---
title: |
       | Supplementary Materials for "The Effect of Legislature Size on Public Spending: A Meta-Analysis"
author:
- "Danilo Freire[^danilo]"
- "Umberto Mignozzetti[^umberto]"
- "Catarina Roman[^catarina]"
- "Huzeyfe Alptekin[^huzeyfe]"
date: \today
fontfamily: libertine
fontawesome: yes
fontsize: 11pt
monospace-url: yes
spacing: double
papersize: a4paper
bibliography: references.bib
biblio-style: apalike
always_allow_html: true
output:
  pdf_document:
    citation_package: natbib
    fig_caption: yes
    number_sections: yes
    toc: true
    keep_tex: no
    template: template.latex
---

[^danilo]: Senior Lecturer, School of Social and Political Sciences, University of Lincoln, <dfeire@lincoln.ac.uk>, <https://danilofreire.github.io>.

[^umberto]: Visiting Assistant Professor, Quantitative Theory and Methods Department, Emory University, <umberto.mignozzetti@emory.edu>, <http://umbertomig.com>. Corresponding author.

[^catarina]: PhD Student, University of California, San Diego, <acroman@ucsd.edu>, <http://catarinaroman.github.io>.

[^huzeyfe]: Independent Researcher, <huzeyfealptekin@gmail.com>.

\appendix

```{r, include=FALSE}
# Knitr options
knitr::opts_chunk$set(fig.pos = "H") # holds figure position
knitr::opts_chunk$set(echo = TRUE)
def.chunk.hook  <- knitr::knit_hooks$get("chunk")
knitr::knit_hooks$set(chunk = function(x, options) {
  x <- def.chunk.hook(x, options)
  ifelse(options$size != "normalsize", paste0("\n \\", options$size,"\n\n", x, "\n\n \\normalsize"), x)
})
```

```{r, include=FALSE}
## Starting
set.seed(732578) # From random.org

# Required packages
pkgs <- c("tidyverse", "meta", "metafor",
          "readxl", "devtools", "data.table",
          "knitr", "gridGraphics", "gridExtra",
          "ggpubr", "kableExtra", "magick",
          'stargazer', 'pander', 'broom')

# Install if not already installed
install <- function(x) {
  if (x %in% rownames(installed.packages()) == FALSE)
    install.packages(x, dependencies = T,
                     repos = "http://cran.us.r-project.org")
}
lapply(pkgs, install)
devtools::install_github("isubirana/compareGroups")
devtools::install_github("MathiasHarrer/dmetar")

# Load packages
lapply(pkgs, require, character.only = T)
library("compareGroups"); library(dmetar)

# Broom the multi-level model
broom_mod <- function(mod, subgroup = FALSE) {
  aux <- data.frame(predict(mod))
  if (subgroup) {
    teeff = aux$pred
    seteeff = aux$se
    labeff = c("Subgroup Effect")
    loeff = c(aux$ci.lb)
    upeff = c(aux$ci.ub)
  } else {
    teeff = c(aux$pred, NA)
    seteeff = c(aux$se, NA)
    labeff = c("Overall Effect",
               "Prediction Interval")
    loeff = c(aux$ci.lb, aux$cr.lb)
    upeff = c(aux$ci.ub, aux$cr.ub)
  }
  mod2 <- tibble(
    TE = as.numeric(mod$yi),
    seTE = sqrt(mod$vi),
    studlab = mod$slab,
    lower = as.numeric(mod$yi)-1.96*sqrt(mod$vi),
    upper = as.numeric(mod$yi)+1.96*sqrt(mod$vi),
    group = 'A') %>%
    bind_rows(.,
              aux = tibble(
                TE = teeff,
                seTE = seteeff,
                studlab = labeff,
                lower = loeff,
                upper = upeff,
                group = "B")) %>%
    group_by(studlab) %>%
    mutate(studlab2 = paste0(studlab, "_", 1:n())) %>%
    ungroup()
  return(mod2)
}

# Estimation of the heterogeneous effects
estim_het <- function(dat, yi, v, random, slab, hetvar = NULL) {
  if (is.null(hetvar)) {
    stop('Hetvar should be different than null.')
  }
  levshetvar <- unique(hetvar)
  fullmod <- rma.mv(yi = yi, V = v, random = random,
                    data = dat, slab = slab, method = 'REML',
                    test = 't', tdist = T)
  res <- tibble()
  for (i in levshetvar) {
    partmod <- rma.mv(yi = yi, V = v,
                      random = random,
                      data = dat, slab = slab,
                      method = 'REML',
                      test = 't', tdist = T,
                      subset = hetvar == i)
    partmod <- broom_mod(partmod, subgroup = T)
    partmod$byvar = i
    partmod <- bind_rows(
      tibble(TE = NA, seTE = NA,
             studlab = toupper(i), lower = NA,
             upper = NA, group = "B", byvar = i),
      partmod
    )
    res <- bind_rows(res, partmod)
  }
  aux <- data.frame(predict(fullmod))
  res <- bind_rows(res,
                   tibble(
                     byvar = NA,
                     TE = c(aux$pred, NA),
                     seTE = c(aux$se, NA),
                     studlab = c("Overall Effect", "Prediction Interval"),
                     lower = c(aux$ci.lb, aux$cr.lb),
                     upper = c(aux$ci.ub, aux$cr.ub),
                     group = "B"))
  res <- data.frame(res)
  res$byvar <- toupper(res$byvar)
  return(res)
}

# Build plot function for forest plots
build_forest <- function(mod, capt, lsize = 22, ttl = NULL) {
  if(class(mod)[1] == "rma.mv") {
    mod2 <- broom_mod(mod)
  } else {
  # Build dataset for plot
  mod2 <- tibble(
    TE = mod$TE,
    seTE = mod$seTE,
    studlab = mod$studlab,
    lower = mod$lower,
    upper = mod$upper,
    group = "A") %>%
    bind_rows(.,
              aux = tibble(
                TE = c(mod$TE.random, NA),
                seTE = c(mod$seTE.random, NA),
                studlab = c("Overall Effect",
                            "Prediction Interval"),
                lower = c(mod$lower.random,
                          mod$lower.predict),
                upper = c(mod$upper.random,
                          mod$upper.predict),
                group = "B")) %>%
    group_by(studlab) %>%
    mutate(studlab2 = paste0(studlab, "_", 1:n())) %>%
    ungroup()
  }
  # Graph limits
  limg <- max(abs(c(mod2$lower, mod2$upper)))
  # Build plot
  p <- mod2 %>%
    ggplot(aes(y = reorder(studlab2, TE),
               x = TE, xmin = lower, xmax = upper)) +
    geom_point(aes(color = group)) +
    geom_errorbarh(aes(color = group),
                   height = 0.1) +
    scale_color_manual(values = c("#000000", "#8b0000")) +
    scale_x_continuous(limits = c(-1.1 * limg, 1.1 * limg)) +
    scale_y_discrete(
      labels = function(x)
        str_replace(x, "_[0-9]*$", "")) +
    geom_vline(xintercept = 0,
               color = "#000000", linetype = "dashed") +
    labs(x = "",
         y = "") +
    facet_grid(group~., scales = "free", space = "free") +
    labs(caption = capt,
         title = ttl) +
    theme_minimal() %+replace%
    theme(strip.text.y = element_blank(),
          legend.position = "none",
          axis.text.y = element_text(size = .8 * lsize,
                                     hjust = 1),
          axis.text.x = element_text(size = .6 * lsize,
                                     hjust = 1.1),
          plot.caption = element_text(size = lsize),
          plot.title.position = "plot",
          plot.title = element_text(hjust = 0.5,
                                    face = "bold",
                                    margin = margin(0, 0, 10, 0)),
          panel.grid.major = element_blank())
  return(p)
}

# Build forest plot for heterogeneous analysis
build_forest_het <- function(dat, coef, v, slab, capt, lsize = 22, ttl = NULL, hetvar = NULL) {
  mod2 <- estim_het(dat = dat,
                    yi = coef,
                    v = v,
                    random = ~ 1 | id_level1/id_level2,
                    slab = slab,
                    hetvar = hetvar)
  mod2 <- data.frame(mod2)
  mod2$byvar <- toupper(mod2$byvar)
  TEaux <- mod2$TE
  TEaux[mod2$studlab== 'Subgroup Effect'] = TEaux[mod2$studlab== 'Subgroup Effect'] - 100
  # Graph limits
  limg <- max(abs(c(mod2$lower, mod2$upper)))
  # Build plot
  p <- mod2 %>%
    ggplot(aes(y = reorder(studlab, TEaux),
               x = TE, xmin = lower, xmax = upper)) +
    geom_point(aes(color = group)) +
    geom_errorbarh(aes(color = group),
                   height = 0.1) +
    scale_color_manual(values = c("#000000", "#8b0000")) +
    scale_x_continuous(limits = c(-1.1 * limg, 1.1 * limg)) +
    scale_y_discrete(
      labels = function(x)
        str_replace(x, "_[0-9]*$", "")) +
    geom_vline(xintercept = 0,
               color = "#000000", linetype = "dashed") +
    labs(x = "",
         y = "") +
    facet_grid(byvar~., scales = "free", space = "free") +
    labs(caption = capt,
         title = ttl) +
    theme_minimal() %+replace%
    theme(strip.text.y = element_blank(),
          legend.position = "none",
          axis.text.y = element_text(size = .8 * lsize,
                                     hjust = 1),
          axis.text.x = element_text(size = .6 * lsize,
                                     hjust = 1.1),
          plot.caption = element_text(size = lsize),
          plot.title.position = "plot",
          plot.title = element_text(hjust = 0.5,
                                    face = "bold",
                                    margin = margin(0, 0, 10, 0)),
          panel.grid.major = element_blank())
  return(p)
}

export2md2<-function(x, which.table="descr", nmax=TRUE, header.labels=c(),
                    caption=NULL, format="html", width=Inf,
                    strip=FALSE, first.strip=FALSE, background="#D2D2D2",
                    size=NULL, landscape=FALSE,
                    header.background=NULL, header.color=NULL,
                    position="center", ...){

  trim <- function(x){
    x <- gsub("^[ ]+","",x)
    x <- gsub("[ ]+$","",x)
    x
  }

  prepare <- function (x, nmax, header.labels)
    {

        # x <- tab[1:3]
        # nmax <- TRUE
        # header.labels <- c()
        # names(attributes(x))

        show.all <- attr(x, "show.all")
        show.descr <- attr(x, "show.descr")
        groups <- attr(x, "groups")
        ny <- attr(x, "ny")
        all.last <- attr(x, "all.last")

        varnames <- attr(x, "varnames")
        nr <- attr(x, "nr")
        desc <- x$desc
        avail <- x$avail
        nmax.pos <- attr(x, "nmax.pos")
        nmax.avail.pos <- NULL
        if (length(nmax.pos[[1]]) == 0 & length(nmax.pos[[2]]) == 0) nmax.avail.pos <- integer(0)
        if (length(nmax.pos[[1]]) == 0 & length(nmax.pos[[2]]) > 0) nmax.avail.pos <- nmax.pos[[2]] + 1
        if (length(nmax.pos[[1]]) > 0 & length(nmax.pos[[2]]) == 0) nmax.avail.pos <- 1
        if (length(nmax.pos[[1]]) > 0 & length(nmax.pos[[2]]) > 0) nmax.avail.pos <- c(1, nmax.pos[[2]])
        if (length(nmax.avail.pos) > 0 && nmax) {
            Nmax <- apply(avail[, nmax.avail.pos, drop = FALSE],2, function(x) max(as.double(x)))
        } else {
            Nmax <- NULL
            nmax <- FALSE
        }

        dd.pos <- attr(x, "dd.pos")
        j <- 1
        table1 <- NULL
        if (!is.null(attr(x, "caption")))
            cc <- character(0)
        for (i in 1:length(varnames)) {
            if (nr[i] == 1) {
                t.i <- desc[j, , drop = FALSE]
            } else {
                t.i <- rbind(rep(NA, ncol(desc)), desc[j:(j + nr[i] -
                                                              1), , drop = FALSE])
                rownames(t.i)[1] <- paste(varnames[i], ":", sep = "")
                rownames(t.i)[-1] <- sub(varnames[i], "", rownames(t.i)[-1], fixed = TRUE)
                rownames(t.i)[-1] <- sub(": ", "    ", rownames(t.i)[-1])
                if (length(dd.pos) < ncol(t.i)) {
                    t.i[1, -dd.pos] <- t.i[2, -dd.pos]
                    t.i[2, -dd.pos] <- NA
                }
            }
            table1 <- rbind(table1, t.i)
            j <- j + nr[i]
            if (!is.null(attr(x, "caption"))) {
                if (attr(x, "caption")[[i]] == "")
                    cc <- c(cc, rep("", NROW(t.i)))
                else cc <- c(cc, attr(x, "caption")[[i]], rep("",
                                                              NROW(t.i) - 1))
            }
        }
        if (ncol(table1) == 0) table1 <- table1[-1, ]
        if (nmax) table1 <- rbind(colnames(table1), c(paste("N=", Nmax, sep = ""), rep("", ncol(table1) - length(Nmax))), table1) else table1 <- rbind(colnames(table1), table1)
        table1 <- ifelse(is.na(table1), "", table1)
        if (length(header.labels)==6 && is.null(names(header.labels))){
            names(header.labels)<-c("all","p.overall","p.trend","ratio","p.ratio","N")
        }
        if ("all"%in%names(header.labels)){
            ww.all<-grep("^\\[ALL\\]",trim(table1[1,]))
            if (length(ww.all)>0){
                ww.all<-ww.all[1]
                table1[1,ww.all]<-header.labels["all"]
            }
        }
        if ("p.overall"%in%names(header.labels)){
            ww.p.overall<-which(table1[1,]=="p.overall")
            if (length(ww.p.overall)>0){
                ww.p.overall<-rev(ww.p.overall)[1]
                table1[1,ww.p.overall]<-header.labels["p.overall"]
            }
        }
        if ("p.trend"%in%names(header.labels)){
            ww.p.trend<-which(table1[1,]=="p.trend")
            if (length(ww.p.trend)>0){
                ww.p.trend<-rev(ww.p.trend)[1]
                table1[1,ww.p.trend]<-header.labels["p.trend"]
            }
        }
        if ("ratio"%in%names(header.labels)){
            ww.ratio<-which(table1[1,]%in%c("OR","HR"))
            if (length(ww.ratio)>0){
                ww.ratio<-rev(ww.ratio)[1]
                table1[1,ww.ratio]<-header.labels["ratio"]
            }
        }
        if ("p.ratio"%in%names(header.labels)){
            ww.p.ratio<-which(table1[1,]=="p.ratio")
            if (length(ww.p.ratio)>0){
                ww.p.ratio<-rev(ww.p.ratio)[1]
                table1[1,ww.p.ratio]<-header.labels["p.ratio"]
            }
        }
        if ("N"%in%names(header.labels)){
            ww.N<-which(table1[1,]=="N")
            if (length(ww.N)>0){
                ww.N<-rev(ww.N)[1]
                table1[1,ww.N]<-header.labels["N"]
            }
        }
        table1 <- apply(table1, 2, format, justify = "centre")
        colnames(table1) <- rep("", ncol(table1))



        table2 <- x[[2]]
        table2 <- as.matrix(table2)
        table2 <- ifelse(is.na(table2), "", table2)
        table2 <- rbind(colnames(table2), table2)
        table2 <- apply(table2, 2, format, justify = "centre")
        colnames(table2) <- rep("", ncol(table2))

        # rearrange table 1 and table 2 by putting all column after descriptives by groups.
        if (all.last & show.all & show.descr & groups){
            table1[,1:(ny+1)] <- table1[,c(2:(ny+1),1)]
        }
        if (all.last){
            table2[,1:(ny+1)] <- table2[,c(2:(ny+1),1)]
        }

        # out
        out <- list(table1 = table1, table2 = table2)
        if (!is.null(attr(x, "caption"))) attr(out, "cc") <- cc
        attr(out, "nmax") <- nmax
        nr <- ifelse(nr>1, nr+1, nr)
        nr <- cbind(nr, rep(0:1, length(nr))[1:length(nr)])
        nr <- unlist(apply(nr, 1, function(x) rep(x[2],x[1])))
        attr(out, "nr") <- nr
        out

    }

  # compiled.format <- try(rmarkdown::all_output_formats(knitr::current_input())[1],silent=TRUE)
  #
  # if (inherits(compiled.format, "try-error") || is.null(compiled.format)){
  #   warning("you are using export2md out of Rmarkdown context...")
  # } else {
  #   if (compiled.format%in%c("html_document","ioslides_presentation","slidy_presentation")) format <- "html"
  #   if (compiled.format%in%c("pdf_document","beamer_presentation")) format <- "latex"
  #   if (compiled.format=="word_document") format <- "markdown"
  # }

  if (missing(format)){
    format <- NA
    if (!interactive()){ # execute inside Rmarkdown
      if (knitr::is_html_output()) format="html"
      if (knitr::is_latex_output()) format="latex"
      if (!knitr::is_html_output() & !knitr::is_latex_output()) format="markdown"
      if (is.na(format)){
        warning("Unable to identify format -> HTML assigned.")
        format <- "html"
      }
    } else {# execute inside Rmarkdown
      warning("You are calling export2md outside Rmarkdown without specifying format -> html format is assigned")
      format <- "html"
    }
  }

  extras <- list(...)
  if (!inherits(x, "createTable"))
    stop("x must be of class 'createTable'")
  if (inherits(x, "cbind.createTable"))
    stop("x cannot be of class 'cbind.createTable'")
  ww <- charmatch(which.table, c("descr", "avail"))
  if (is.na(ww))
    stop(" argument 'which.table' must be either 'descr' or 'avail'")

  if (attr(x,"groups")){
    y.name.label<-attr(x,"yname")
  }

  if (!is.null(caption)){
    if (!is.character(caption))
      stop(" argument 'caption' must be a character'")
  } else {
    if (ww==1){
      if (attr(x,"groups"))
        if (inherits(x,"missingTable"))
          caption<-paste("Missingness table by groups of `",y.name.label,"'",sep="")
      else
        caption<-paste("Summary descriptives table by groups of `",y.name.label,"'",sep="")
      else
        if (inherits(x,"missingTable"))
          caption<-"Missingess table"
        else
          caption<-"Summary descriptives table"
    }
    if (ww==2){
      if (attr(x,"groups"))
        caption<-paste("Available data by groups of `",y.name.label,"'",sep="")
      else
        caption<-"Available data"
    }
  }
  pp <- compareGroups:::prepare(x, nmax = nmax, header.labels)
  #pp <- prepare(x, nmax = nmax, header.labels)
  cc <- unlist(attr(pp, "cc"))
  if (ww %in% c(1)) {
    table1 <- pp[[1]]
    ii <- ifelse(rownames(table1)[2] == "", 2, 1)
    table1 <- cbind(rownames(table1), table1)
    align <- c("l", rep("c", ncol(table1)))
    table1[1, 1] <- " "
    colnames(table1) <- table1[1, ]
    colnames(table1)[-1] <- trim(colnames(table1)[-1])
    table1 <- table1[-1, , drop = FALSE]
    table1[,2:ncol(table1)] <- apply(table1[,-1,drop=FALSE],2,trim)
    #table1 <- table1[-2]
    #names(table1)[1] <- 'Extended Sample'
    # N in the second row
    table1 <- table1[,-3]
    colnames(table1)[2] <- 'Extended Sample'
    table1 <- table1[,c(1,3,2)]
    n.exists <- nrow(table1) > 1 && length(grep("^N=", trim(table1[1, 2])))
    if (format=="latex" & strip)
      table1[((1+n.exists):nrow(table1)),ncol(table1)] <- ifelse(table1[((1+n.exists):nrow(table1)),ncol(table1)]=="", "\\vphantom{}", table1[((1+n.exists):nrow(table1)),ncol(table1)])
    if (format=="latex") caption <- gsub("%","\\\\%",caption)
    ans <- knitr::kable(table1, align = align, row.names = FALSE, caption=caption[1], format=format,
                        booktabs=format=="latex", longtable=TRUE, linesep="", ...)
    ans <- add_indent(ans, grep("^ ",table1[,1]))
    if (width!=Inf) ans <- column_spec(ans, 1, width = width)
    # groups
    if (!is.null(cc)){
      for (cci in 1:length(cc)){
        if (cc[cci]!=""){
          group.label <- cc[cci]
          inici <- 0
          final <- 0
        } else {
          if (cc[cci-1]!="")
            group.begin <- cci-1
          if (cci==length(cc) || cc[cci+1]!=""){
            group.end <- cci
            ans <- group_rows(ans, group.label, group.begin+n.exists, group.end+n.exists)
          }
        }
      }
    }
    if (strip){
      nr <- attr(pp, "nr")
      ans <- row_spec(ans, which(nr==!first.strip)+n.exists, background = background)
    }
    if (n.exists){
      ans <- row_spec(ans, 1, hline_after=TRUE)
    }

    if (landscape) ans <- landscape(ans)
    if (format=="latex"){
      ans <- kable_styling(ans, latex_options = c("repeat_header"), font_size=size, position=position)
      #if (n.exists) ans <- gsub("\\\\midrule", "", ans) # remove lines after N
      if (n.exists) ans <- gsub("\\\\midrule\n\\\\endfirsthead", "\\\\endfirsthead", ans) # remove lines after N
      if (strip) ans <- gsub("\\textbackslash{}vphantom\\{\\}", "\\vphantom{}", ans, fixed=TRUE)
    }
    if (format=="html"){
      ans <- kable_styling(ans, bootstrap_options=c(if (!strip) "striped" else NULL, "condensed"), full_width=FALSE, font_size=size, position=position)
      ans <- row_spec(ans, 0, background=header.background, color=header.color)
      ans <- row_spec(ans, if (sum(unlist(attr(x, "nmax.pos")))>0) 1 else 0, italic=sum(unlist(attr(x, "nmax.pos")))>0, extra_css = "border-bottom: 1px solid grey")
    }
    return(ans)
  }
  if (ww %in% c(2)){
    # table2 <- compareGroups:::prepare(x, nmax = nmax, c())[[2]]
    table2 <- prepare(x, nmax = nmax, c())[[2]]
    table2 <- cbind(rownames(table2), table2)
    if (!is.null(attr(x, "caption"))) {
      cc <- unlist(attr(x, "caption"))
      table2[, 1] <- paste("    ", table2[, 1])
    }
    table2[1, 1] <- " "
    align <- c("l", rep("c", ncol(table2)))
    colnames(table2)[-1] <- trim(table2[1, -1])
    table2 <- table2[-1, ,drop=FALSE]
    ans <- knitr::kable(table2, align = align, row.names = FALSE, caption=caption[1], format=format, booktabs=format=="latex", longtable=TRUE, ...)
    # ans <- knitr::kable(table2, align = align, row.names = FALSE, caption=caption[1], format=format, booktabs=format=="latex")
    # groups
    if (!is.null(cc)){
      for (cci in 1:length(cc)){
        if (cc[cci]!=""){
          group.label <- cc[cci]
          inici <- 0
          final <- 0
        } else {
          if (cc[cci-1]!="")
            group.begin <- cci-1
          if (cci==length(cc) || cc[cci+1]!=""){
            group.end <- cci
            ans <- group_rows(ans, group.label, group.begin, group.end)
          }
        }
      }
    }
    ans <- add_indent(ans, integer())
    if (strip) ans <- row_spec(ans, which(rep(0:1, nrow(table2))[1:nrow(table2)]==!first.strip), background = background)
    if (width!=Inf) ans <- column_spec(ans, 1, width = width)
    if (landscape) ans <- landscape(ans)
    if (format=="latex"){
      ans <- kable_styling(ans, latex_options = c("repeat_header"), font_size = size, position=position)
    }
    if (format=="html"){
      ans <- kable_styling(ans, bootstrap_options=c(if (!strip) "striped" else NULL, "condensed"), full_width = FALSE, font_size = size, position=position)
      ans <- row_spec(ans, 0, background=header.background, color=header.color)
      ans <- row_spec(ans, 0, italic=FALSE, extra_css = "border-bottom: 1px solid grey")
    }

    return(ans)
  }
}

# Load datasets
load("../dataset/dataCoefs.RData")
```

\newpage

# A Brief Introduction to the "Law of _1/n_" and the "Reverse Law of _1/n_"

Here we present the intuition behind the "law of $1/n$", as well as the
alternative "reverse law of $1/n$" proposed by @primo2008distributive.

In their seminal paper, @weingast1981political argue that a high number of
legislators will increase public spending beyond the optimal economic
benchmark. They suggest that politicians have an incentive to over-provide
concentrated benefits to their constituencies, spreading the costs across all
constituencies through generalised taxation. The corollary of their model is
that larger legislatures generate more public spending.

In their model, every local public goods project of size $x$ generates a
concave benefit $b(x)$, and there are convex costs associated with the
project\footnote{The concavity assumption implies that $b'>0$ and $b''<0$. A
similarly convex assumption would mean that $c'>0$ and $c''>0$. We assume that
the derivatives are well defined throughout the analysis. Moreover, we drop the
constituency index to make the model more intuitive, so the reader should
assume that we always employ a symmetric Nash equilibrium.}. The first type of
cost, $c_1(x)$, comprises the expenses within the constituency (e. g., hiring a
local company for the project). The second type of cost, $c_2(x)$, captures the
expenses outside the constituency (e. g., hiring a company from another state).
Finally, the third cost, $c_3(x)$, captures the externalities generated by the
project (e. g., how much prices shift because local economic factors are being
used to provide the project). The total cost is equal to $c(x) = c_1(x) +
c_2(x) + c_3(x)$. The tax burden generated by the project is equal to $T(x) =
c_1(x) + c_2(x)$.

Projects are economically efficient when the marginal costs are equal to the
marginal benefits of the project size. This leads to the economic optimal
project size, $x^E$, which is defined as $b'(x^E) - c_1'(x^E) - c_2'(x^E) -
c_3'(x^E) = 0$. However, the projects that are actually implemented have a
different structure. First, assume that the constituency in question has a tax
burden $t = 1/n$, where $n$ represents the number of constituencies. Also,
suppose that benefits are distorted by the fact that costs within the
constituency ($c_1(x)$) become investments in local firms. Therefore, the costs
and benefits of implementing a project with size $x$ have the following
structure:

\[
N(x) = b(x) + c_1(x) - \frac{1}{n}[c_1(x) + c_2(x)] - c_3(x)
\]

Differentiating $N(x)$ with respect to $x$ gives us the first order condition
for an optimal project implementation.

\[
b'(x) + c_1'(x) - \dfrac{1}{n}[c_1'(x) + c_2'(x)] - c_3'(x) = 0
\]

Totally differentiating $x$ with respect to $n$ gives us the following:

\[
b''(x)\dfrac{dx}{dn} + c_1''(x)\dfrac{dx}{dn} - \dfrac{1}{n}[c_1''(x) + c_2''(x)]\dfrac{dx}{dn} + \dfrac{1}{n^2}[c_1'(x) + c_2'(x)] - c_3''(x)\dfrac{dx}{dn} = 0
\]

And rearranging the terms, we find the following expression for $\dfrac{dx}{dn}$:

\[
\dfrac{dx}{dn} = -\dfrac{n^{-2}[c_1'(x) + c_2'(x)]}{b''(x) + c_1''(x)-n^{-1}[c_1''(x) + c_2''(x)] - c_3''(x)}
\]

Note that the numerator is always positive, as the marginal investment costs
inside and outside the district increase according to the project size. The
"law of $1/n$" holds when the denominator is negative. $b''(x) < 0$ by
assumption, and $-n^{-1}[c_1''(x) + c_2''(x)] - c_3''(x) < 0$ as $c_1'', c_2'',
c_3'' > 0$. Then, the _law of 1/n_ is true when $c_1''(x)$ is smaller than
$n^{-1}[c_1''(x) + c_2''(x)] + c_3''(x) - b''(x)$.

As the above condition suggests, the "law of $1/n$" holds only in specific
situations. @primo2008distributive advance that theory by considering other
situations where the "law" may not hold, and they also argue that there are
cases in which a "reverse law of $1/n$" may exist, that is, where larger
legislatures may lead to lower public expenditures.

Following @primo2008distributive, let $n$ be the number of districts, $m$ the
number of citizens in each district, and $nm$ the total population in the
country. Consider a local public good that generates a per capita benefit of
$b(x, m) = x^\alpha m^{\beta-1}$ according to size $x$, where $\beta$ is the
degree of congestion of the public good, that is, how much the addition of
individuals reduce the benefits for other individuals (note that the lower the
$\beta$, the higher the congestion). In terms of costs, consider a linear cost
function $C(x) = x$, and in terms of taxation, assume that the people in the
district pay both local and federal government taxes. The degree that taxes are
shared is denoted by $s$. Moreover, there is a deadweight loss of the taxes
$\theta \geq 1$. The tax then becomes: $t = \left(\dfrac{(n – ns + s)x + (ns –
s)X}{nm}\right)^\theta$. Therefore, the citizens receive the following net
benefit of a project with size $x$:

\[
\pi = x^\alpha m^{\beta-1} - \left(\dfrac{(n – ns + s)x + (ns – s)X}{nm}\right)^\theta
\]

Maximising this function, and solving for the symmetric Nash equilibrium where
$x = X$ for all projects, we find the following optimal project size:

\[
x^* = \left[\left(\dfrac{\alpha}{\theta}\right)^{\dfrac{1}{\theta-\alpha}}\right]\left[\left(nm\right)^{\dfrac{\beta+\theta-1}{\theta - \alpha}}\right]\left[\left( \dfrac{n^{2-\beta-\theta}}{n-ns+s}\right)^{\dfrac{1}{\theta-\alpha}}\right]
\]

And in the graph below, we show simulations for $n$ varying from 10 to 20,
holding constant $\beta = 0.35$, $\theta \in \{0.65, 0.75\}$, $s = 0.5$, $m =
100$, and $\alpha = 0.7$.

```{r, echo=FALSE, cache=TRUE, fig.align='center'}
xstar <- function(x, beta, theta, alpha = 0.7, s = 0.5, m = 10) {
  n = x
  a = (alpha/theta)^(1/(theta-alpha))
  b = (n*m)^((beta+theta-1)/(theta-alpha))
  c = ((n^(2-beta-theta))/(n-n*s+s))^(1/(theta-alpha))
  return (a*b*c)
}

curve(xstar(x, beta = 0.35, theta = 0.8), from = 10, to = 20,
      main = 'Increasing project size when varying legislature size \n (high deadweight losses)', xlab = 'Legislature Size', ylab = 'Project Size')
curve(xstar(x, beta = 0.35, theta = 0.5), from = 10, to = 20,
      main = 'Decreasing project size when varying legislature size \n (low deadweight losses)', xlab = 'Legislature Size', ylab = 'Project Size')
```

Thus, we see that both the "law of $1/n$" and its reverse formulation are
equally plausible. Here we show how the results change just by changing the
levels of deadweight losses, but authors have suggested other reasons why the
"law of $1/n$" may not apply, such as bicameralism [@chen2007law], popular
initiatives [@matsusaka2005endogeneity], type of government
[@coate2011government], supermajorities [@lee2015supermajority], political
fragmentation [@lledo2003electoral], and ideology [@bjedov2014impact].

# Search Criteria

The first step in our systematic review consisted in gathering a study sample.
We started our data collection with a manual search based on a set of keywords
we scouted from the distributive politics literature. This search produced a
database with many entries that were unrelated to our subject of investigation.
To reduce the number of false positives in our sample, we restricted our search
to studies that cited Weingast, Shepsle and Johnsen's 1981 paper "_The
Political Economy of Benefits and Costs: A Neoclassical Approach to
Distributive Politics_", which is the fundamental contribution to the field.
Although \href{https://scholar.google.com/}{Google Scholar} reports the article
has received
\href{https://scholar.google.com/scholar?um=1&ie=UTF-8&lr&cites=13117579863846712459}{2,180}
citations, our search resulted in 2,664 records on the 21^st^ of November 2019.

We webscraped three large academic databases:
\href{https://scholar.google.com/}{Google Scholar} (n = 1001);
\href{https://academic.microsoft.com/home}{Microsoft Academic} (n = 927); and
\href{https://www.scopus.com/}{Scopus} (n = 736). The `R` script we wrote
extracted the article title, abstract, authors, year, journal of publication,
and database from which the record originated. Our code is in the `R` script
available in this repository. We then filtered these results and selected only
articles that were written in English. While we included unpublished papers in
our search, we excluded book chapters and doctoral theses from our sample.

We complemented this process by doing a term search on Google Scholar after
having finished the entire eligibility procedure in the first sample. We
formulated the search string based on the terms and expressions that appeared
most frequently in the articles included in our meta-analysis. The choice of
string translates the central point of our inquiry: the relationship between
legislature size and public spending. The search string was as follows:
`("upper chamber size" OR "lower chamber size" OR "council size" OR "parliament
size" OR "legislature size" OR "number of legislators" OR "legislative size")
AND ("spending" OR "expenditure" OR "government size")`. We scraped Google
Scholar on March 5^th^, 2021. Using the titles of this new database's records,
we fuzzy-matched those articles with the database of @weingast1981political
citations mentioned previously to check for duplicates. We then performed the
exact same eligibility procedures, further explained below. This resulted in
3,041 additional records. Combining the two search strategies, we assessed a
total of 5,705 records.

# Article Selection

The selection process was conducted by two authors in three phases. In the
first phase, we excluded all titles that were clearly unrelated to our topic of
interest. This was only a preliminary step, as we were not able to eliminate a
large number of entries. Then, we read all abstracts. We kept all publications
whose main topics were either government expenditure or legislative structures.
Abstracts that indicated that the paper discussed or estimated the impacts of
representative institutions, elections, or chamber dynamics were included. This
allowed us to significantly reduce our sample.

In the second phase, we assessed full texts. To remain in our sample, the paper
should (i) conduct a quantitative analysis, (ii) report data on the number of
legislators, and (iii) also include data on public expenditure. If the
publication had all three, we kept it in our sample. Disagreements in this
phase were discussed among the authors, and a third investigator was consulted
when needed.

The third phase consisted of filling out tables for each of the remaining
articles to systematically evaluate their eligibility. Since authors use
different measures for government spending and the number of lower/upper house
members, we extracted all coefficients that provided this information. In this
phase, we also collected information on whether the paper had been published,
and if it explicitly discussed the "law of $1/n$". Upon choosing the variables,
we excluded two studies from the first sample, as they did not have the
dependent/independent variables we collected for our meta-analysis. We then
included the 2 papers we found in the second search, as both conformed to our
criteria, and compiled our final sample of `r length(unique(dat$authoryear))`
articles.

## Exclusion Analysis

We selected the final pool of articles based on two criteria, namely the independent and the dependent variables employed in the paper. The categories follow below:

1.  Independent variables:

-   *N*: Lower Chamber Size
-   *logN*: Log Lower Chamber Size
-   *K*: Upper Chamber Size

2.  Dependent variables:

-   *ExpPC*: Expenditure Per Capita
-   *logExpPC*: Log Expenditure Per Capita
-   *PCTGDP*: Expenditure as Percentage of GDP

If a paper did not use a combination of these variables, we excluded it from the meta-analysis.

## Flow Chart

The diagram below shows each step of our article selection process. We followed the Preferred Reporting Items for Systematic Reviews and Meta-Analyses (PRISMA) statement to conduct our study\footnote{More information about the PRISMA statement is available at \url{http://www.prisma-statement.org}.}. The column to the right depicts the amount of articles excluded in each phase, and the one to the left shows the number of records evaluated.

\bigskip

```{tikz tikz-ex, echo=FALSE, cache=TRUE, eval=TRUE, engine.opts = list(template = "tikz2pdf.tex"), fig.height=2, fig.cap = "PRISMA Flow Chart for First Search Strategy: Weingast et al. Citations"}
\usetikzlibrary{shapes.arrows,arrows.meta,positioning,shapes.geometric}
\scalebox{0.9}{
  \begin{tikzpicture} [node distance = 2.7cm, xshift = 1.5]
  \footnotesize
  \linespread{1.0}

  \node (ID) [stage, rotate = 90, yshift = 2cm, xshift = 2.5cm] {\normalsize{Identification}};

  \node (SC) [stage, below of = ID, rotate = 90] {\normalsize{Screening}};

  \node (EL) [stage, below of = SC, rotate = 90] {\normalsize{Eligibility}};

  \node (INC) [stage2, below of = EL, rotate = 90, xshift = -1.2cm] {\normalsize{Included}};

  \node (id) [phase, right of = ID, xshift = 1cm] {Records identified through webscraping  \\ \textbf{(n = 2664)}};

  \node (1st) [exc, right of = id, xshift = 3cm] {Records in languages other than English, book chapters, doctoral theses, and duplicates excluded \\ \textbf{(n = 1220)}};

  \node (screen) [phase, below of = id] {Records screened \\ \textbf{(n = 1445)}};

  \node (2nd) [exc, right of = screen, xshift = 3cm] {Records excluded after reading title and abstract \\ \textbf{(n = 1069)}};

  \node (elig) [phase, below of = screen] {Full-text articles assessed for Eligibility \\ \textbf{(n = 376)}};

  \node (3rd) [exc, right of = elig, xshift = 3cm] {Non-quantitative studies or records using unrelated variables excluded \\ \textbf{(n = 329)}};

  \node (inc_1) [phase, below of = elig] {Preliminary included articles \\ \textbf{(n = 47)}};

  \node (4th) [exc, right of = inc_1, xshift = 3cm] {Articles excluded during analysis due to nonconforming variables \\ \textbf{(n = 19)}};

\node (inc_2) [phase, below of = inc_1] {Included articles \\ \textbf{(n = 28)}};
  \draw [arrow] (id) -- (1st);
  \draw [arrow] (id) -- (screen);
  \draw [arrow] (screen) -- (2nd);
  \draw [arrow] (screen) -- (elig);
  \draw [arrow] (elig) -- (3rd);
  \draw [arrow] (elig) -- (inc_1);
  \draw [arrow] (inc_1) -- (4th);
  \draw [arrow] (inc_1) -- (inc_2);
 \end{tikzpicture}
}
```

```{tikz tikz-ex2, echo=FALSE, cache=TRUE, eval=TRUE, engine.opts = list(template = "tikz2pdf.tex"), fig.cap="PRISMA Flow Chart for First Search Strategy: Search String with Terms"}
\usetikzlibrary{shapes.arrows,arrows.meta,positioning,shapes.geometric}
\scalebox{0.9}{
  \begin{tikzpicture} [node distance = 2.7cm, xshift = 1.5]
  \footnotesize
  \linespread{1.0}
  \node (ID) [stage, rotate = 90, yshift = 2cm, xshift = 2.5cm] {\normalsize{Screening 1}};
  \node (SC) [stage, below of = ID, rotate = 90] {\normalsize{Screening 2}};
  \node (EL) [stage, below of = SC, rotate = 90] {\normalsize{Eligibility}};
  \node (INC) [stage2, below of = EL, rotate = 90, xshift = -1.2cm] {\normalsize{Included}};
  \node (id) [phase, right of = ID, xshift = 1cm] {Clean records identified through webscraping  \\ \textbf{(n = 3041)}};
  \node (1st) [exc, right of = id, xshift = 3cm] {Records excluded after reading title, or in languages other than English, book chapters, doctoral theses, and duplicates \\ \textbf{(n = 2603)}};
  \node (screen) [phase, below of = id] {Abstracts assessed for Eligibility \\ \textbf{(n = 483)}};
  \node (2nd) [exc, right of = screen, xshift = 3cm] {Records excluded after reading abstract \\ \textbf{(n = 339)}};
  \node (elig) [phase, below of = screen] {Full-text articles assessed for Eligibility \\ \textbf{(n = 99)}};
  \node (3rd) [exc, right of = elig, xshift = 3cm] {Non-quantitative studies or records using unrelated variables excluded \\ \textbf{(n = 86)}};
  \node (inc_1) [phase, below of = elig] {Preliminary included articles \\ \textbf{(n = 13)}};
  \node (4th) [exc, right of = inc_1, xshift = 3cm] {Articles excluded during analysis due to nonconforming variables \\ \textbf{(n = 11)}};
\node (inc_2) [phase, below of = inc_1] {Included articles \\ \textbf{(n = 2)}};
  \draw [arrow] (id) -- (1st);
  \draw [arrow] (id) -- (screen);
  \draw [arrow] (screen) -- (2nd);
  \draw [arrow] (screen) -- (elig);
  \draw [arrow] (elig) -- (3rd);
  \draw [arrow] (elig) -- (inc_1);
  \draw [arrow] (inc_1) -- (4th);
  \draw [arrow] (inc_1) -- (inc_2);
 \end{tikzpicture}
}
```

# Meta-Analysis Dataset

Our meta-analytic data are comprised of two datasets. The first dataset has the
main coefficients reported in the selected studies. These data include only the
most rigorous model from each paper, that is, those estimated with the largest
sample size, most control variables, and fixed effects if the authors added
them. If the article employed a regression discontinuity design, we chose the
coefficient from the optimal bandwidth or from the intermediate one. This
sample encompasses `r dim(dat)[1]` estimates, as
`r as.numeric(table(table(dat$id))[2])` articles analysed two dependent or
independent variables of interest. Our second sample, in contrast, contains all
the `r dim(fulldat)[1]` effect sizes reported in the `r length(unique(dat$id))`
papers.

In the main text, we focus on the results for our restricted sample as we
consider them more robust, but the findings are very similar when we use the
extended dataset. Here we present the results of all tests performed in both
reduced and full samples.

# Descriptive Statistics

Here we show the descriptive statistics for our sample. We focus on the
following paper characteristics: study year; paper publication; the electoral
system mentioned in the publication; and the distribution of the dependent and
independent variables of interest.

## Study Year

The average year of publication in our sample is `r round(mean(dat$year), 2)`
with standard deviation of `r round(sd(dat$year), 2)`. The oldest study
included in the paper is from `r min(dat$year)`, while the most recent paper
was written in `r max(dat$year)`. Therefore, we cover
`r max(dat$year)-min(dat$year)` years of tests of the "law of $1/n$".

```{r, fig.width=7, fig.height=3, fig.cap="Study Year Frequencies", cache=TRUE, size="footnotesize", echo = FALSE}
dat %>%
  select(id, year) %>%
  unique() %>%
  ggplot(aes(x = as.factor(year))) +
    geom_bar(color = "black") +
  labs(x = "",
       y = "") +
  theme_bw()
```

## Frequency of Published Papers

Studies were included in our sample regardless of their publication status.
From the `r length(unique(dat$id))` papers in the sample,
`r as.numeric(table(unique(select(dat, id, published))$published)[2])` were
published while `r as.numeric(table(unique(select(dat, id, published))$published)[1])`
were not published.

```{r, fig.width=4, fig.height=2.5, fig.cap="Was the study published?", size="footnotesize", cache=TRUE, echo = FALSE}
dat %>%
  select(id, published) %>%
  unique() %>%
  ggplot(aes(x = as.factor(published))) +
    geom_bar(color = "black") +
  labs(x = "",
       y = "") +
  theme_bw()
```

## Electoral System

Our sample differs considerably in regards to research design. One remarkable
difference is that several authors apply the logics of the "law of $1/n$", which
was built with majoritarian systems in mind, to non-majoritarian democracies.
In the sample, `r as.numeric(table(unique(select(dat, id, elecsys2))$elecsys2)[1])`
of the papers study *Majoritarian* systems while
`r as.numeric(table(unique(select(dat, id, elecsys2))$elecsys2)[2])` study
*Non-Majoritarian* electoral systems\footnote{Note that for the "law of
$1/n$" to be valid in a non-majoritarian system, we need to assume that despite
the fact that politicians are able to campaign in every place in the district,
votes are geographically concentrated. The concentration facilitates
politicians to use pork-barrel projects to captivate their electoral
supporters.}.

```{r, fig.width=6, fig.height=3, fig.cap="Electoral Systems", size="footnotesize", cache=TRUE, echo = FALSE}
dat %>%
  select(id, elecsys2) %>%
  unique() %>%
  ggplot(aes(x=as.factor(elecsys2))) +
    geom_bar(color = "black") +
  labs(x = "",
       y = "") +
  theme_bw()
```

## Dependent Variables

The dependent variables included in the paper are:

-   `r as.numeric(table(unique(select(dat, id, depvar2))$depvar2)[1])` Expenditure Per Capita papers
-   `r as.numeric(table(unique(select(dat, id, depvar2))$depvar2)[2])` Expenditure as a Percentage of the GDP papers
-   `r as.numeric(table(unique(select(dat, id, depvar2))$depvar2)[3])` Log of Expenditure Per Capita papers

```{r, fig.width=6, fig.height=2.3, fig.cap="Dependent variables across the law of 1/n studies", size="footnotesize", cache=TRUE, echo = FALSE}
dat %>%
  select(id, depvar2) %>%
  unique() %>%
  mutate(depvar2 = factor(depvar2,
                          labels = c("Expenditure Per Capita",
                                     "Expenditure as Percentage GDP",
                                     "Log Expenditure Per Capita"))) %>%
  ggplot(aes(x = depvar2)) +
    geom_bar(color = "black") +
  labs(x = "",
       y = "") +
  coord_flip() +
  theme_bw()
```

## Independent Variables

Most papers in our sample analyse the number of legislators in the lower
chamber (`r as.numeric(table(dat$indepvar2)[3])`). The second most frequent
independent variable is the number of legislators in the upper chamber
(`r as.numeric(table(dat$indepvar2)[1])`). Finally, the minority of papers use the
natural log of the number of legislators in the lower chamber as an independent
variable (`r as.numeric(table(dat$indepvar2)[2])`). As we noted above, some
papers had multiple coefficients, and thus the total number of coefficients is
`r dim(dat)[1]`, while the number of papers is only `r length(unique(dat$id))`.

```{r, fig.width=6, fig.height=3, fig.cap="Independent variables across the law of 1/n studies", size="footnotesize", echo = FALSE, cache=TRUE}
dat %>%
  select(id, indepvar2) %>%
  unique() %>%
  mutate(indepvar2 = factor(indepvar2,
                            labels = c("Upper Chamber Size",
                                       "Log of Lower Chamber Size",
                                       "Lower Chamber Size"
                                                  ))) %>%
  ggplot(aes(x = indepvar2)) +
    geom_bar(color = "black") +
  labs(x = "",
       y = "") +
  coord_flip() +
  theme_bw()
```

## Histogram of the Coefficients and the Standard Errors

The coefficients in the papers vary considerably. We plot a histogram of the
coefficients for all measurements included in the meta-analytic dataset. Most
coefficients and standard deviations are close to zero.

Coefficients:

```{r, size="footnotesize", cache=TRUE, fig.width=5, fig.height=3.3, echo = FALSE}
dat %>%
  ggplot(aes(x = coef)) +
  geom_histogram(bins = 15, color = "black") +
  labs(x = "Coefficients", y = '') +
  theme_bw()
```

Standard errors:

```{r, size="footnotesize", cache=TRUE, fig.width=5, fig.height=3.3, echo = FALSE}
dat %>%
  ggplot(aes(x = SE)) +
  geom_histogram(bins = 10, color = "black") +
  labs(x = "Standard Errors", y = '') +
  theme_bw()
```

## Sign Coefficients

One simple statistic that we can compute to assess the validity of the "law of
$1/n$" is the frequency of positive and negative estimates in the study sample.
Below we plot the frequency for all papers included in the meta-analytic
dataset.

```{r, size="footnotesize", fig.width=5, fig.height=4, fig.cap="Coefficient Sign", cache=TRUE, echo = FALSE}
dat %>%
  ggplot(aes(x=as.factor(scoef))) +
  geom_bar(color = "black") +
  labs(x = "",
       y = "") +
  theme_bw()
```

# Descriptive Statistics of Moderators

We chose a set of moderators that frequently appear in the literature and may
help us interpret our results. We included them in our meta-regressions
alongside an indicator for the type of independent variable used in the
original study: lower chamber size; natural logarithm of lower chamber size; or
upper chamber size. The additional moderators are: publication year; whether
the paper was published in an academic journal; the estimation method used in
the paper; the institutional design in terms of the division of legislative
power; and the electoral system.

```{r, warning=F, message=F, echo = FALSE, cache=TRUE, results='asis'}
fulldat$usemeta2 <- factor(fulldat$usemeta)
levels(fulldat$usemeta2) <- c("Other Coefficients", "Main Sample")
aux <- select(fulldat, usemeta2, indepvar2, elecsys2, method,
              year, published, instdesign) %>%
  rename(`Independent Variables` = indepvar2,
         `Year`                  = year,
         `Published work`        = published,
         `Estimation method`     = method,
         `Institutional Design`  = instdesign,
         `Electoral system`      = elecsys2)
aux$`Independent Variables` <- recode(aux$`Independent Variables`,
                                      `N` = "Lower Chamber Size",
                                      `K` = "Upper Chamber Size",
                                      `logN` = "Log of Lower Chamber Size")
aux$`Electoral system` <- recode(aux$`Electoral system`,
                                 `Non-Maj` = "Non-Majoritarian",
                                 `Maj` = "Majoritarian")
aux <- select(aux, usemeta2, `Independent Variables`, Year, `Published work`, `Estimation method`, `Institutional Design`, `Electoral system`)

aux3 <- descrTable(~.-usemeta2,
                   aux, y = aux$usemeta2,
                   show.p.overall = F,
                   show.all = T)
export2md2(aux3,
           caption = "Descriptive Statistics of Moderators",
           format  = "latex")

```

# Binomial Tests for Coefficient Signs

The "law of $1/n$" posits that we should expect a positive influence of
legislature size on public expenditures. A general test of the theory could
investigate whether the papers find a higher frequency of positive coefficients
in their estimations. In statistical terms, consider a random variable
representing the coefficient sign for the papers. As each sign of the paper is
a Bernoulli trial, the aggregate result for all papers follows a Binomial
distribution with parameters $n$ equals the number of papers, and $p$ the
chance of a positive sign. The "law of $1/n$" can be reformulated as the chance
of $p>0.5$, which facilitates the testing of the theory. The null statistical
hypothesis for such a test is that:

- $H_0$: the proportion of positive and negative signs are statistically equal
  ($p=0.5$).

We take an agnostic approach and acknowledge that either the "law of $1/n$"
($p>0.5$), or the "reverse law of $1/n$" ($p<0.5$) may be true. In this case,
the alternative hypothesis is $p \neq 0.5$. To perform this test, we run
binomial exact tests in `R`, using the function `binom.test(.)`.

This test has two advantages. First, the binomial test ignores the design
discrepancies and focuses on the overall reported effect. This is an important
feature as papers analyse different countries, samples, and have distinct
characteristics, such as whether they were published or not. Second, this test
has the advantages of requiring few assumptions and being easy to interpret.
The disadvantage is that the test is not as informative as the
meta-regressions, as we shall see in the next sections.

For the lower chamber size, the results follow below.

```{r, size="footnotesize", echo = FALSE, cache=TRUE}
aux <- filter(dat, indepvar2 == "N")
aux2 <- binom.test(table(aux$scoef)[2], sum(table(aux$scoef)), p = 0.5)
pander(tidy(aux2)[,-c(2,4,5,6)][,c(3,4,1,2)])
```

Under the null hypothesis of $p=0.5$, we find that
`r as.numeric(table(aux$scoef)[2])` out of `r sum(table(aux$scoef))` studies have
a positive sign. The chance of a distribution with $p=0.5$ generate this sample
is equal to p-value = `r round(as.numeric(aux2$p.value), 3)`. Thus, we
`r ifelse(as.numeric(aux2$p.value)<0.1, 'accept', 'reject')` the hypothesis that
$p \neq 0.5$.

For the log of lower chamber size, the results are as follows:

```{r, size="footnotesize", echo = FALSE, cache=TRUE}
aux <- filter(dat, indepvar2 == "logN")
aux2 <- binom.test(table(aux$scoef)[2], sum(table(aux$scoef)), p = 0.5)
pander(tidy(aux2)[,-c(2,4,5,6)][,c(3,4,1,2)])
```

Out of `r sum(table(aux$scoef))` studies, `r as.numeric(table(aux$scoef)[2])`
have a positive sign. The chance of a distribution with $p=0.5$ generate this
sample is equal to p-value = `r round(as.numeric(aux2$p.value), 3)`. So we
`r ifelse(as.numeric(aux2$p.value)<0.1, 'accept', 'reject')` the hypothesis that
$p \neq 0.5$.

Finally, for upper chamber size, the results are:

```{r, size="footnotesize", echo = FALSE, cache=TRUE}
aux <- filter(dat, indepvar2=='K')
aux2 <- binom.test(table(aux$scoef)[2], sum(table(aux$scoef)), p=0.5)
pander(tidy(aux2)[,-c(2,4,5,6)][,c(3,4,1,2)])
```

Here we see that `r as.numeric(table(aux$scoef)[2])` out of
`r sum(table(aux$scoef))` studies have a positive sign. The p-value for this test
is `r round(as.numeric(aux2$p.value), 3)`. Therefore, we
`r ifelse(as.numeric(aux2$p.value)<0.1, 'accept', 'reject')` the hypothesis that
$p \neq 0.5$. This is the only test that presents evidence of an association
between the legislature size and expenditure.

We also tested the possibility that the signs change depending on whether the
study analyses unicameral or non-unicameral legislative bodies. This analysis
does not take into consideration the size of the upper house, as it would
indicate that the cases in the paper have a non-unicameral legislature. The
results for this analysis follow below. They show that there is no significant
change in the sign of the relationship between public spending and legislature
size, when controlling by the institutional design.

```{r, size="footnotesize", echo = FALSE, cache=TRUE}
aux <- list()
aux[[1]] <- filter(dat, indepvar2 == "N", instdesign2 == 'Unicameral')
aux[[2]] <- filter(dat, indepvar2 == "N", instdesign2 != 'Unicameral')
aux[[3]] <- filter(dat, indepvar2 == "logN", instdesign2 == 'Unicameral')
aux[[4]] <- filter(dat, indepvar2 == "logN", instdesign2 != 'Unicameral')
aux[[1]] <- binom.test(table(aux[[1]]$scoef)[2],
                       sum(table(aux[[1]]$scoef)), p = 0.5)
aux[[2]] <- binom.test(table(aux[[2]]$scoef)[2],
                       sum(table(aux[[2]]$scoef)), p = 0.5)
aux[[3]] <- binom.test(table(aux[[3]]$scoef)[2],
                       sum(table(aux[[3]]$scoef)), p = 0.5)
aux[[4]] <- binom.test(table(aux[[4]]$scoef)[2],
                       sum(table(aux[[4]]$scoef)), p = 0.5)
aux2 <- tidy(aux[[1]])[,-c(2,4,5,6)][,c(3,4,1,2)]
aux2 <- bind_rows(aux2, tidy(aux[[2]])[,-c(2,4,5,6)][,c(3,4,1,2)])
aux2 <- bind_rows(aux2, tidy(aux[[3]])[,-c(2,4,5,6)][,c(3,4,1,2)])
aux2 <- bind_rows(aux2, tidy(aux[[4]])[,-c(2,4,5,6)][,c(3,4,1,2)])
aux2 <- aux2 %>%
  mutate(`Indep. Variable` = c('Lower House Size', 'Lower House Size', 'Log of Lower House Size', 'Log of Lower House Size'),
         `Legislative Inst.` = c('Unicameral', 'Non-unicameral', 'Unicameral', 'Non-unicameral')) %>%
  relocate(`Indep. Variable`, `Legislative Inst.`, .before = method)
aux2$method <- NULL
aux2$alternative <- NULL
pander(aux2)
```

# Meta-Analysis (Main Sample)

## Estimation Method

In general terms, there are two main ways to conduct a meta-analysis. Scholars
either use fixed effects or random effects models. The fixed effects model
assumes that there exists a single true effect and all estimates are an attempt
to uncover this effect. The random effects model, in contrast, assumes that
there is a distribution of true effects, and that the coefficients vary based
on sampling and tests characteristics.

In this paper, we employ a random effects model. The empirical papers testing
the *law of 1/n* are very diverse. We tried to capture some of this diversity
by considering the main dependent and independent variables separately, but
they have at least three other important sources of dispersion:

1.  **Study sample**: Counties, Municipalities, States, Provinces, Countries.
2.  **Electoral systems**: Majoritarian, PR, Mixed.
3.  **Modelling strategies**: Panel data, Standard OLS, IV, RDD.
4.  **Institutional design**: Unicameral, Bicameral, or a Mix of both Unicameral and Bicameral.

These sources of heterogeneity have two implications. First, they make our
estimates notably disperse. Second, the amount of heterogeneity makes fixed
effects estimates unrealistic and biased. Thus, we opt for the random effects
model.

Assume that each study has an effect of $T_i$. In a random effects model, we
can decompose this effect into two components, the true effect that the study
with the same specifications as $i$ comes from, $\theta_i$, and a within-study
error $\varepsilon_i$:

$$
T_i \ = \ \theta_i + \varepsilon_i
$$

And the random effects model assumes that the $\theta_i$ varies from study to
study, having a true parameter $\mu$, plus a between-study error, $\xi_i$:

$$
T_i \ = \ \mu + \xi_i + \varepsilon_i
$$

And the random effects model estimates the parameter $\mu$, under the challenge
of estimating both the within-and-between-study sampling errors.

Another crucial assumption in meta-analysis is that the coefficients should be
independent [@harrer2019doing; @cheung2019guide; @veroniki2016methods;
@borenstein2011introduction]. This assumption states that for our findings to
be consistent, the coefficients must come from different sources of variation.
However, in the political economy literature, authors frequently use similar
datasets, and almost all papers fit more than one model with similar variables.
While our restricted dataset contains `r nrow(dat)` estimates, our full dataset
has `r nrow(fulldat)` coefficients. This is because the papers report an
average of `r round(mean(as.numeric(table(fulldat$id))), 2)` coefficients. To
correct for the violation of the study independence assumption, we use a
multilevel random effects model [@cheung2014modeling]. We add two extra levels
to the regular random effects model, one that indicates the publication ID and
another that indicates the data used in the original article. These levels are
assumed to remove dependence structures in the data, therefore improving the
estimates of our coefficient of interest, which is the effect of legislature
size on public spending.

There are two levels in the main models. First, we build a common index for
papers that share the same data specifications. The papers with common indexing
are:

| Publication ID                              | Source of Dependence                                                     |
|---------------------------------------------|--------------------------------------------------------------------------|
| 3, 42, 132, 165, 439, 441, 467, 505         | US States Data                                                           |
| 408, 208, A258                              | US Municipalities Data                                                   |
| 849, 578                                    | US Municipalities Data and Same Author in Two Different Studies          |

All of the remaining papers received a unique index. As the number of papers
with these dependencies is not very high, and many characteristics within
papers vary (e. g., state-wise ad-hoc exclusions, start and end points
variations, region selection, etc), we show that it makes little difference to
change from this index to the paper IDs. In regard to the full dataset, the
results change considerably, because authors fit several models within the same
paper. In any case, we use multilevel random effects for all the estimated
models in this paper.

To check the possibility of publication bias, we add a funnel plot and an
@egger1997bias test for distribution asymmetry for every model. Funnel plots
display the possibility of having a file-drawer effect, meaning that null
results are under-represented in our sample. In this type of plot, under the
assumption of no file-drawer effect, the coefficients are expected to lie
symmetrically around the mean observed outcome. If they are asymmetric, it
provides evidence for a file-drawer effect.

We use the `R` packages `meta` and `dmetar` in all estimates
[@harrer2019doing]. We employ the *Restricted Maximum Likelihood Estimator* to
assess the variance of the true effect size ($\tau^2$), which in our
formulation represents the variance of $\xi_i$. The literature regards this
estimator as the most precise when analysing continuous measures, such as the
ones we have in our data [@veroniki2016methods].

We combine the three independent variables (Lower Chamber Size, Log of Lower
Chamber Size, and Upper Chamber Size) with our dependent variables of interest
(Expenditure Per Capita, Log of Expenditure Per Capita, Expenditure as a
Percentage of the GDP). This formed a $3 \times 3$ table, yet not all
combinations are available in the data. The results are shown below.

## Lower Chamber Size and Expenditure Per Capita

The results for the meta-analysis that compares lower chamber size and
expenditure per capita are available below.

```{r, cache=TRUE, echo = FALSE, size="footnotesize"}
# Pooling effects analysis -- ExpPC x N
aux <- dat %>%
  filter(indepvar2 == 'N',
         depvar2 == 'ExpPC')

mod <- rma.mv(coef, VAR, data=aux,
          slab=paste(authoryear),
          test = 't',
          random = ~ 1 | id_level1/id_level2,
          tdist = TRUE,
          method = "REML")

mod
```

The forest plot:

```{r, fig.width=8, fig.height=4.5, fig.cap="Effect of Lower Chamber Size on Expenditure Per Capita", warning=FALSE, cache=TRUE, echo = FALSE}
build_forest(mod, NULL)
```

```{r,include=F, warning=F, cache=TRUE, echo = FALSE}
f1 <- build_forest(mod, NULL, lsize = 15, ttl = '1.1 - Lower Chamber Size\nand Expenditure Per Capita')
```

And to assess the possibility of publication bias, we add the funnel plot below:

```{r, fig.width=4, fig.height=4, fig.cap="Funnel Plot -- Effect of Lower Chamber Size on Expenditure Per Capita", warning=FALSE, cache=TRUE, echo = FALSE}
funnel(mod)
```

Highlights:

1.  The results are highly heterogeneous: $Q =$ `r round(mod$QE, 2)`.
2.  The estimated SMD in the random effects model is $g =$ `r round(predict(mod)$pred,2)` ($SE =$ `r round(predict(mod)$se,3)`).
3.  The prediction interval ranges from `r round(predict(mod)$cr.lb,2)` to `r round(predict(mod)$cr.ub,2)`. Therefore, it `r ifelse(((predict(mod)$cr.lb)*(predict(mod)$cr.ub))>0,'does not emcompasses zero.','emcompasses zero.')`
4. The @egger1997bias test `r ifelse(ranktest(mod)$pval<0.05, 'confirmed', 'rejected')` the hypothesis of publication bias.

### Electoral System Subgroup Analysis

The *law of 1/n* was introduced to describe majoritarian systems, but the theory has also been applied to non-majoritarian electoral systems. We estimated a subgroup analysis using a binary indicator for electoral system. The results may be seen below.

We find little evidence that either majoritarian or non-majoritarian systems produce systematically positive effects on expenditure per capita. Both coefficients are not statistically significant, and they reassure us that the absence of effect is not caused by pooling multiple types of electoral systems.

```{r, fig.width=8, fig.height=6, fig.cap="Subgroup Analysis of Lower Chamber Size x Expenditure Per Capita, Controlling by Electoral System", warning=FALSE, cache=TRUE, echo = FALSE}
build_forest_het(aux, aux$coef, aux$VAR, slab = aux$authoryear,
                 capt = NULL, hetvar = aux$elecsys2)
```

### Institutional Design Subgroup Analysis

The *law of 1/n* was conceived to explain the expected effect of chamber size on public expenditure. However, papers such as @chen2007law demonstrated that the relationship is mediated by the size of the upper chamber. Their finding suggests that the institutional design of the legislature influences the empirical predictions in the sample we study. To test this possibility, we estimated a subgroup analysis using a binary indicator for institutional design. The results may be seen below.

We find little evidence that either unicameral or non-unicameral legislative institutions produce systematically positive effects on expenditure per capita. However, the coefficients tend to be weakly positive for unicameral studies, suggesting that there might be a relationship, that is masked by the low statistical power.

```{r, fig.width=8, fig.height=6, fig.cap="Subgroup Analysis of Lower Chamber Size x Expenditure Per Capita, Controlling by Institutional Design", warning=FALSE, cache=TRUE, echo = FALSE}
build_forest_het(aux, aux$coef, aux$VAR, slab = aux$authoryear,
                 capt = NULL, hetvar = aux$instdesign2)
```

## Log of Lower Chamber Size and Expenditure Per Capita

There are no studies that have per capita expenditure as the dependent variable
and log of lower chamber size as the independent variable.

## Upper House Size and Expenditure Per Capita

Now we look into the upper chamber size. In this model, we investigate the
effect of upper house size on expenditure per capita.

```{r, size="footnotesize", echo = FALSE, cache=TRUE}
# Pooling effects analysis -- ExpPC x K
aux <- dat %>%
  filter(indepvar2 == 'K',
         depvar2 == 'ExpPC')

mod <- rma.mv(coef, VAR, data=aux,
          slab=paste(authoryear),
          test = 't',
          random = ~ 1 | id_level1/id_level2,
          tdist = TRUE,
          method = "REML")
mod
```

```{r, fig.width=7, fig.height=3, fig.cap="Effect of Upper Chamber Size on Expenditure Per Capita", warning=FALSE, echo = FALSE}
build_forest(mod, NULL)
```

We see no evidence of publication bias:

```{r, fig.width=4, fig.height=3.5, fig.cap="Funnel Plot -- Effect of Upper Chamber Size on Expenditure Per Capita", warning=FALSE, cache=TRUE, echo = FALSE}
funnel(mod)
```


```{r, include=FALSE, warning=FALSE, echo = FALSE}
f2 <- build_forest(mod, NULL, 15, ttl = '1.6 - Upper Chamber Size\nand Expenditure Per Capita')
```

Highlights:

1.  The results are highly heterogeneous: $Q =$ `r round(mod$QE, 2)`.
2.  The estimated SMD in the random effects model is $g =$ `r round(predict(mod)$pred,2)` ($SE =$ `r round(predict(mod)$se,3)`).
3.  The prediction interval ranges from `r round(predict(mod)$cr.lb,2)` to `r round(predict(mod)$cr.ub,2)`. Therefore, it `r ifelse(((predict(mod)$cr.lb)*(predict(mod)$cr.ub))>0,'does not emcompasses zero.','emcompasses zero.')`
4. The @egger1997bias test `r ifelse(ranktest(mod)$pval<0.05, 'confirmed', 'rejected')` the hypothesis of publication bias.

## Lower Chamber Size and Log of Expenditure Per Capita

This model estimates the effect of lower chamber size on log of expenditure per
capita.

```{r, size="footnotesize", echo = FALSE, cache=TRUE}
# Pooling effects analysis -- logExpPC x N
aux <- dat %>%
  filter(indepvar2 == 'N',
         depvar2 == 'logExpPC')

mod <- rma.mv(coef, VAR, data=aux,
          slab=paste(authoryear),
          test = 't',
          random = ~ 1 | id_level1/id_level2,
          tdist = TRUE,
          method = "REML")

mod
```

The forest plot is as follows:

```{r, fig.width=8, fig.height=3, fig.cap="Effect of Lower Chamber Size on Log of Expenditure Per Capita",warning=FALSE, echo = FALSE, cache=TRUE}
build_forest(mod, NULL)
```

The funnel plot shows no evidence of publication bias.

```{r, fig.width=4, fig.height=4, fig.cap="Funnel Plot -- Effect of Lower Chamber Size on Log Expenditure Per Capita", warning=FALSE, cache=TRUE, echo = FALSE}
funnel(mod)
```


```{r,include=F, echo = FALSE, warning=F, cache=TRUE}
f3 <- build_forest(mod, NULL, 15, ttl = '1.2 - Lower Chamber Size\nand Log Expenditure Per Capita')
```

Highlights:

1.  The results are highly heterogeneous: $Q =$ `r round(mod$QE, 2)`.
2.  The estimated SMD in the random effects model is $g =$ `r round(predict(mod)$pred,2)` ($SE =$ `r round(predict(mod)$se,3)`).
3.  The prediction interval ranges from `r round(predict(mod)$cr.lb,2)` to `r round(predict(mod)$cr.ub,2)`. Therefore, it `r ifelse(((predict(mod)$cr.lb)*(predict(mod)$cr.ub))>0,'does not emcompasses zero.','emcompasses zero.')`
4. The @egger1997bias test `r ifelse(ranktest(mod)$pval<0.05, 'confirmed', 'rejected')` the hypothesis of publication bias.

## Log of Lower Chamber Size and Log of Expenditure Per Capita

In this specification, we measure how changes in the log of lower chamber size
impact the log of per capita expenditure.

```{r, size="footnotesize", echo = FALSE, cache=TRUE}
# Pooling effects analysis -- logExpPC x logN
aux <- dat %>%
  filter(indepvar2 == 'logN',
         depvar2 == 'logExpPC')

mod <- rma.mv(coef, VAR, data=aux,
          slab=paste(authoryear),
          test = 't',
          random = ~ 1 | id_level1/id_level2,
          tdist = TRUE,
          method = "REML")
mod
```

The forest plot:

```{r, fig.width=6, fig.height=3, fig.cap="Effect of Log Lower Chamber Size on Log Expenditure Per Capita", echo = FALSE, warning=FALSE, cache=TRUE}
build_forest(mod, NULL)
```

And the funnel plot:

```{r, fig.width=4, fig.height=4, fig.cap="Funnel Plot -- Effect of Log Lower Chamber Size on Log Expenditure Per Capita", warning=FALSE, cache=TRUE, echo = FALSE}
funnel(mod)
```


```{r,include=F, echo = FALSE, warning=F, cache=TRUE}
f4  <- build_forest(mod, NULL, 15, ttl = '1.4 - Log Lower Chamber Size\nand Log Expenditure Per Capita')
```

Highlights:

1.  The results are highly heterogeneous: $Q =$ `r round(mod$QE, 2)`.
2.  The estimated SMD in the random effects model is $g =$ `r round(predict(mod)$pred,2)` ($SE =$ `r round(predict(mod)$se,3)`).
3.  The prediction interval ranges from `r round(predict(mod)$cr.lb,2)` to `r round(predict(mod)$cr.ub,2)`. Therefore, it `r ifelse(((predict(mod)$cr.lb)*(predict(mod)$cr.ub))>0,'does not emcompasses zero.','emcompasses zero.')`
4. The @egger1997bias test `r ifelse(ranktest(mod)$pval<0.05, 'confirmed', 'rejected')` the hypothesis of publication bias.

## Upper Chamber Size and Log of Expenditure Per Capita

No studies correlate the log of per capita expenditure with the size of the
upper chamber.

## Lower Chamber Size and Expenditure as Percentage of GDP

This model evaluates the relationship between lower house size and percentage
of GDP as public expenditure.

```{r, size="footnotesize", echo = FALSE, cache=TRUE}
# Pooling effects analysis -- PCTGDP x N
aux <- dat %>%
  filter(indepvar2 == 'N',
         depvar2 == 'PCTGDP')

mod <- rma.mv(coef, VAR, data=aux,
          slab=paste(authoryear),
          test = 't',
          random = ~ 1 | id_level1/id_level2,
          tdist = TRUE,
          method = "REML")
mod
```

Below, you may find the forest plot:

```{r, fig.width=8, fig.height=3, fig.cap="Effect of Lower Chamber Size on Expenditure as Percentage of GDP", warning=FALSE, echo = FALSE, cache=TRUE}
build_forest(mod, NULL)
```

The funnel plot to test for publication bias:

```{r, fig.width=4, fig.height=4, fig.cap="Funnel Plot -- Effect of Lower Chamber Size on Expenditure as Percentage of GDP", warning=FALSE, cache=TRUE, echo = FALSE}
funnel(mod)
```

```{r, include=F, warning=F, echo = FALSE, cache=TRUE}
f5 <- build_forest(mod, NULL, 15, ttl = '1.3 - Lower Chamber Size\nand Expenditure as Percentage of GDP')
```

Highlights:

1.  The results are highly heterogeneous: $Q =$ `r round(mod$QE, 2)`.
2.  The estimated SMD in the random effects model is $g =$ `r round(predict(mod)$pred,2)` ($SE =$ `r round(predict(mod)$se,3)`).
3.  The prediction interval ranges from `r round(predict(mod)$cr.lb,2)` to `r round(predict(mod)$cr.ub,2)`. Therefore, it `r ifelse(((predict(mod)$cr.lb)*(predict(mod)$cr.ub))>0,'does not emcompasses zero.','emcompasses zero.')`
4. The @egger1997bias test `r ifelse(ranktest(mod)$pval<0.05, 'confirmed', 'rejected')` the hypothesis of publication bias.

## Log Lower Chamber Size and Expenditure as Percentage of GDP

This model correlates the percentage of GDP as public expenditure as the
dependent variable and the log lower chamber size as the independent variable.

```{r, echo = FALSE, size="footnotesize", cache=TRUE}
# Pooling effects analysis -- PCTGDP x logN
aux <- dat %>%
  filter(indepvar2 == 'logN',
         depvar2 == 'PCTGDP')

mod <- rma.mv(coef, VAR, data=aux,
          slab=paste(authoryear),
          test = 't',
          random = ~ 1 | id_level1/id_level2,
          tdist = TRUE,
          method = "REML")

mod
```

```{r, echo = FALSE, fig.width=8, fig.height=3, fig.cap="Effect of Log Lower Chamber Size on Expenditure as Percentage of GDP", warning=FALSE, cache=TRUE}
build_forest(mod, NULL)
```

Funnel plot:

```{r, fig.width=4, fig.height=4, fig.cap="Funnel Plot -- Effect of Log of Lower Chamber Size on Expenditure as Percentage of GDP", warning=FALSE, cache=TRUE, echo = FALSE}
funnel(mod)
```

```{r, include=F, warning=F, cache=TRUE, echo = FALSE}
f6 <- build_forest(mod, NULL, 15, ttl = '1.5 - Log Lower Chamber Size\nand Expenditure as Percentage of GDP')
```

Highlights:

1.  The results are highly heterogeneous: $Q =$ `r round(mod$QE, 2)`.
2.  The estimated SMD in the random effects model is $g =$ `r round(predict(mod)$pred,2)` ($SE =$ `r round(predict(mod)$se,3)`).
3.  The prediction interval ranges from `r round(predict(mod)$cr.lb,2)` to `r round(predict(mod)$cr.ub,2)`. Therefore, it `r ifelse(((predict(mod)$cr.lb)*(predict(mod)$cr.ub))>0,'does not emcompasses zero.','emcompasses zero.')`
4. The @egger1997bias test `r ifelse(ranktest(mod)$pval<0.05, 'confirmed', 'rejected')` the hypothesis of publication bias.

## Upper Chamber Size and Expenditure as Percentage of GDP

This model measures the effect of upper chamber size on the public expenditure
share of the GDP.

```{r, size="footnotesize", cache=TRUE, echo = FALSE}
# Pooling effects analysis -- PCTGDP x K
aux <- dat %>%
  filter(indepvar2 == 'K',
         depvar2 == 'PCTGDP')

mod <- rma.mv(coef, VAR, data=aux,
          slab=paste(authoryear),
          test = 't',
          random = ~ 1 | id_level1/id_level2,
          tdist = TRUE,
          method = "REML")
mod
```

```{r, fig.width=8, fig.height=4, fig.cap="Effect of Upper Chamber Size on Expenditure as Percentage of GDP", warning=FALSE, cache=TRUE, echo = FALSE}
build_forest(mod, NULL)
```

As in our previous estimations, we find no evidence of file-drawer effect.

```{r, fig.width=4, fig.height=4, fig.cap="Funnel Plot -- Effect of Upper Chamber Size on Expenditure as Percentage of GDP", warning=FALSE, cache=TRUE, echo = FALSE}
funnel(mod)
```

```{r, include=F, warning=F, cache=TRUE, echo = FALSE}
f7 <- build_forest(mod, NULL, 15, ttl = '1.7 - Upper Chamber Size\nand Expenditure as Percentage of GDP')
```

Highlights:

1.  The results are highly heterogeneous: $Q =$ `r round(mod$QE, 2)`.
2.  The estimated SMD in the random effects model is $g =$ `r round(predict(mod)$pred,2)` ($SE =$ `r round(predict(mod)$se,3)`).
3.  The prediction interval ranges from `r round(predict(mod)$cr.lb,2)` to `r round(predict(mod)$cr.ub,2)`. Therefore, it `r ifelse(((predict(mod)$cr.lb)*(predict(mod)$cr.ub))>0,'does not emcompasses zero.','emcompasses zero.')`
4. The @egger1997bias test `r ifelse(ranktest(mod)$pval<0.05, 'confirmed', 'rejected')` the hypothesis of publication bias.

## Lower Chamber Size and Expenditure per Capita (Instrumental Variables Only)

Here we estimate a meta-analysis of the papers which use instrumental
variables. The results may be seen below.

```{r, size="footnotesize", cache=TRUE, echo = FALSE}
# Pooling effects analysis -- ExpPC x N (IV only)
aux <- dat %>%
  filter(indepvar2 == 'N',
         depvar2 == 'ExpPC',
         method %in% c('IV'))

mod <- rma.mv(coef, VAR, data=aux,
          slab=paste(authoryear),
          test = 't',
          random = ~ 1 | id_level1/id_level2,
          tdist = TRUE,
          method = "REML")
mod
```

And the forest plot:

```{r, fig.width=8, fig.height=3, fig.cap="Effect of Lower Chamber Size on Expenditure Per Capita (IV Only)", warning=FALSE, cache=TRUE, echo = FALSE}
build_forest(mod, NULL)
```

No evidence of publication bias, as shown in the following funnel plot:

```{r, fig.width=4, fig.height=4, fig.cap="Funnel Plot -- Effect of Lower Chamber Size on Expenditure Per Capita (IV Only)", warning=FALSE, cache=TRUE, echo = FALSE}
funnel(mod)
```


```{r, include=F, warning=F, cache=TRUE, echo = FALSE}
aux <- dat %>%
  filter(indepvar2 == 'N',
         depvar2 == 'ExpPC')

mod <- rma.mv(coef, VAR, data=aux,
          slab=paste(authoryear),
          test = 't',
          random = ~ 1 | id_level1/id_level2,
          tdist = TRUE,
          method = "REML")

f8 <- build_forest_het(aux, aux$coef,
                       aux$VAR, slab = aux$authoryear,
                       capt = NULL, lsize = 15,
                       ttl = 'Lower Chamber Size and Expenditure per Capita\n(Subgrouping by Estimation Technique)',
                       hetvar = aux$method)
```

Highlights:

1.  The results are highly heterogeneous: $Q =$ `r round(mod$QE, 2)`.
2.  The estimated SMD in the random effects model is $g =$ `r round(predict(mod)$pred,2)` ($SE =$ `r round(predict(mod)$se,3)`).
3.  The prediction interval ranges from `r round(predict(mod)$cr.lb,2)` to `r round(predict(mod)$cr.ub,2)`. Therefore, it `r ifelse(((predict(mod)$cr.lb)*(predict(mod)$cr.ub))>0,'does not emcompasses zero.','emcompasses zero.')`
4. The @egger1997bias test `r ifelse(ranktest(mod)$pval<0.05, 'confirmed', 'rejected')` the hypothesis of publication bias.

### Regression Method Subgroup Analysis

Over time, the literature evolved to use causally identified techniques to determine the effect of legislature size on the expenditure per capita. To study whether the method had an effect on the estimated coefficients, we fit a subgroup analysis using the method employed in each paper.

```{r, fig.width=8, fig.height=6, fig.cap="Subgroup Analysis of Lower Chamber Size x Expenditure Per Capita, Controlling by Estimation Method", warning=FALSE, cache=TRUE, echo = FALSE}
aux <- dat %>%
  filter(indepvar2 == 'N',
         depvar2 == 'ExpPC')

mod <- rma.mv(coef, VAR, data=aux,
          slab=paste(authoryear),
          test = 't',
          random = ~ 1 | id_level1/id_level2,
          tdist = TRUE,
          method = "REML")

build_forest_het(aux, aux$coef, aux$VAR, slab = aux$authoryear,
                 capt = NULL, hetvar = aux$method)
```

Although all methods generate a null effect, the IV method seems to be well distributed, with two papers with positive effects and two papers displaying negative effects. The random effects model for the subgroup is 0.22, which is negative but non-significant. Improving the estimation technique, for the case of IVs, still renders a null effect of legislature size on per capita government expenditure.

## Lower Chamber Size and Log of Expenditure per Capita (Regression Discontinuity Design Only)

In this subsection, we run a meta-analysis with papers that include regression
discontinuity designs.

```{r, size="footnotesize", cache=TRUE, echo = FALSE}
# Pooling effects analysis -- logExpPC x N (RDD only)
aux <- dat %>%
  filter(indepvar2 == 'N',
         depvar2 == 'logExpPC',
         method == 'RDD')

mod <- rma.mv(coef, VAR, data=aux,
          slab=paste(authoryear),
          test = 't',
          random = ~ 1 | id_level1/id_level2,
          tdist = TRUE,
          method = "REML")
mod
```

Forest plot:

```{r, fig.width=8, fig.height=3, fig.cap="Effect of Lower Chamber Size on Expenditure Per Capita (RDDs)", warning=FALSE, cache=TRUE, echo = FALSE}
build_forest(mod, NULL)
```

```{r, include=F, warning=F, cache=TRUE, echo = FALSE}
aux <- dat %>%
  filter(indepvar2 == 'N',
         depvar2 == 'logExpPC',
         method == 'RDD')

mod <- rma.mv(coef, VAR, data=aux,
          slab=paste(authoryear),
          test = 't',
          random = ~ 1 | id_level1/id_level2,
          tdist = TRUE,
          method = "REML")

f9 <- build_forest(mod, capt = NULL,
                       lsize = 15, ttl = 'Lower Chamber Size and Log Expenditure per Capita\n(RDDs)')
```

Highlights:

1.  The results are highly heterogeneous: $Q =$ `r round(mod$QE, 2)`.
2.  The estimated SMD in the random effects model is $g =$ `r round(predict(mod)$pred,2)` ($SE =$ `r round(predict(mod)$se,3)`).
3.  The prediction interval ranges from `r round(predict(mod)$cr.lb,2)` to `r round(predict(mod)$cr.ub,2)`. Therefore, it `r ifelse(((predict(mod)$cr.lb)*(predict(mod)$cr.ub))>0,'does not emcompasses zero.','emcompasses zero.')`
4. The @egger1997bias test `r ifelse(ranktest(mod)$pval<0.05, 'confirmed', 'rejected')` the hypothesis of publication bias.
5. One notable absence in the analysis is the seminar paper by @petterssonlidbom2012size. This is because the author uses Log of Expenditure Per Capita x Log of Legislature Size.

```{r, warning=F, cache=TRUE, echo = FALSE, results='hide'}
## Plot 1
pdf('../graphs/graph1.pdf', width = 16, height = 11)
ggarrange(f1,f3,f5,f4,f6,f2,f7, align = 'hv')
dev.off()
## Plot 2
pdf('../graphs/graph2.pdf', width = 12, height = 6)
ggarrange(f8, f9, align = 'hv')
dev.off()
```

# Meta-Analysis (Extended Sample)

## Lower Chamber Size and Expenditure Per Capita

Here we estimate the relationship between expenditure per capita as a dependent
variable, and the lower chamber as the independent variable.

```{r, size="footnotesize", warning=F, cache=T, echo = FALSE}
# Pooling effects analysis -- ExpPC x N
aux <- fulldat %>%
  filter(indepvar2 == 'N',
         depvar2 == 'ExpPC')

mod <- rma.mv(coef, VAR, data=aux,
          slab=paste(authoryear),
          test = 't',
          random = ~ 1 | id_level1/id_level2,
          tdist = TRUE,
          method = "REML")
mod
```

The forest plot:

```{r, fig.width=12, fig.height=11, fig.cap="Effect of Lower Chamber Size on Expenditure Per Capita", warning=FALSE, cache=TRUE, echo = FALSE}
build_forest(mod, NULL)
```

And to assess the possibility of publication bias, we add the funnel plot below:

```{r, fig.width=4, fig.height=3, fig.cap="Funnel Plot -- Effect of Lower Chamber Size on Expenditure Per Capita", warning=FALSE, cache=TRUE, echo = FALSE}
funnel(mod)
```

Highlights:

1.  The results are highly heterogeneous: $Q =$ `r round(mod$QE, 2)`.
2.  The estimated SMD in the random effects model is $g =$ `r round(predict(mod)$pred,2)` ($SE =$ `r round(predict(mod)$se,3)`).
3.  The prediction interval ranges from `r round(predict(mod)$cr.lb,2)` to `r round(predict(mod)$cr.ub,2)`. Therefore, it `r ifelse(((predict(mod)$cr.lb)*(predict(mod)$cr.ub))>0,'does not emcompasses zero.','emcompasses zero.')`
4. The @egger1997bias test `r ifelse(ranktest(mod)$pval<0.05, 'confirmed', 'rejected')` the hypothesis of publication bias.

### Electoral System Subgroup Analysis

The *law of 1/n* was formulated to analyse the budgetary allocation in majoritarian systems. In the theoretical section below, we explain why the argument has potential issues when applied to non-majoritarian electoral systems. We estimated a subgroup analysis using a dummy variable indicating the electoral system included in each model.

We see that majoritarian systems do not have a clear positive effect on budgetary spending. The majoritarian systems in the sample had a random effects model estimate of -0.25, while the random effects model in the non-majoritarian subgroup fitted a value of 0.08. Both are not statistically significant, but they reassure us that the absence of effect is not caused by pooling multiple types of electoral systems.

```{r, fig.width=8, fig.height=12, fig.cap="Subgroup Analysis of Lower Chamber Size x Expenditure Per Capita, Controlling by Electoral System", warning=FALSE, cache=T, echo = FALSE}
build_forest_het(aux, aux$coef, aux$VAR, slab = aux$authoryear,
                 capt = NULL, hetvar = aux$elecsys2)
```

### Institutional Design Subgroup Analysis

The *law of 1/n* was conceived to explain the expected effect of chamber size on public expenditure. However, papers such as @chen2007law demonstrated that the relationship is mediated by the size of the upper chamber. Their finding suggests that the institutional design of the legislature influences the empirical predictions in the sample we study. To test this possibility, we estimated a subgroup analysis using a binary indicator for institutional design. The results may be seen below.

We find little evidence that either unicameral or non-unicameral legislative institutions produce systematically positive effects on expenditure per capita. However, the coefficients tend to be weakly positive for unicameral studies, suggesting that there might be a relationship that is masked by the low statistical power.

```{r, fig.width=8, fig.height=12, fig.cap="Subgroup Analysis of Lower Chamber Size and Expenditure Per Capita, Controlling by Institutional Design", warning=FALSE, cache=TRUE, echo = FALSE}
build_forest_het(aux, aux$coef, aux$VAR, slab = aux$authoryear,
                 capt = NULL, hetvar = aux$instdesign2)
```

## Log of Lower Chamber Size and Expenditure Per Capita

There are no studies that have per capita expenditure as the dependent variable
and log of lower chamber size as the independent variable.

## Upper Chamber Size and Expenditure Per Capita

Now we investigate the effect of the upper chamber size on expenditure per
capita.

```{r, size="footnotesize", warning=F, cache=TRUE, echo = FALSE}
# Pooling effects analysis -- ExpPC x K
aux <- fulldat %>%
  filter(indepvar2 == 'K',
         depvar2 == 'ExpPC')

mod <- rma.mv(coef, VAR, data=aux,
          slab=paste(authoryear),
          test = 't',
          random = ~ 1 | id_level1/id_level2,
          tdist = TRUE,
          method = "REML")
mod
```

The forest plot:

```{r, fig.width=9, fig.height=10, fig.cap="Effect of Upper Chamber Size on the Per Capita Government Expenditure",warning=FALSE, cache=TRUE, echo = FALSE}
build_forest(mod, NULL)
```

The funnel plot suggests no publication bias.

```{r, fig.width=4, fig.height=4, fig.cap="Funnel Plot -- Effect of Upper Chamber Size on Expenditure Per Capita", warning=FALSE, cache=TRUE, echo = FALSE}
funnel(mod)
```

Highlights:

1.  The results are highly heterogeneous: $Q =$ `r round(mod$QE, 2)`.
2.  The estimated SMD in the random effects model is $g =$ `r round(predict(mod)$pred,2)` ($SE =$ `r round(predict(mod)$se,3)`).
3.  The prediction interval ranges from `r round(predict(mod)$cr.lb,2)` to `r round(predict(mod)$cr.ub,2)`. Therefore, it `r ifelse(((predict(mod)$cr.lb)*(predict(mod)$cr.ub))>0,'does not emcompasses zero.','emcompasses zero.')`
4. The @egger1997bias test `r ifelse(ranktest(mod)$pval<0.05, 'confirmed', 'rejected')` the hypothesis of publication bias.

## Lower Chamber Size and Log of Expenditure Per Capita

Here we test the relationship between log of per capita expenditure
and the number of lower chamber legislators.

```{r, size="footnotesize", warning=F, cache=TRUE, echo = FALSE}
# Pooling effects analysis -- logExpPC x N
aux <- fulldat %>%
  filter(indepvar2 == 'N',
         depvar2 == 'logExpPC')

mod <- rma.mv(coef, VAR, data=aux,
          slab=paste(authoryear),
          test = 't',
          random = ~ 1 | id_level1/id_level2,
          tdist = TRUE,
          method = "REML")
mod
```

The forest plot is shown below:

```{r, fig.width=8, fig.height=5, fig.cap="Effect of Lower Chamber Size on Log Expenditure Per Capita",warning=FALSE, cache=TRUE, echo = FALSE}
build_forest(mod, NULL)
```

```{r, fig.width=4, fig.height=3, fig.cap="Funnel Plot -- Effect of Lower Chamber Size on Log Expenditure Per Capita", warning=FALSE, cache=TRUE, echo = FALSE}
funnel(mod)
```

Highlights:

1.  The results are highly heterogeneous: $Q =$ `r round(mod$QE, 2)`.
2.  The estimated SMD in the random effects model is $g =$ `r round(predict(mod)$pred,2)` ($SE =$ `r round(predict(mod)$se,3)`).
3.  The prediction interval ranges from `r round(predict(mod)$cr.lb,2)` to `r round(predict(mod)$cr.ub,2)`. Therefore, it `r ifelse(((predict(mod)$cr.lb)*(predict(mod)$cr.ub))>0,'does not emcompasses zero.','emcompasses zero.')`
4. The @egger1997bias test `r ifelse(ranktest(mod)$pval<0.05, 'confirmed', 'rejected')` the hypothesis of publication bias.

## Log of Lower Chamber Size and Log of Expenditure Per Capita

In this specification, we analyse log of per capita expenditure as a function
of the log of lower chamber size.

```{r, size="footnotesize", warning=F, cache=TRUE, echo = FALSE}
# Pooling effects analysis -- logExpPC x logN
aux <- fulldat %>%
  filter(indepvar2 == 'logN',
         depvar2 == 'logExpPC')

mod <- rma.mv(coef, VAR, data=aux,
          slab=paste(authoryear),
          test = 't',
          random = ~ 1 | id_level1/id_level2,
          tdist = TRUE,
          method = "REML")
mod
```

The forest plot:

```{r, fig.width=8, fig.height=7, fig.cap="Effect of Log of Lower Chamber Size on Log of Expenditure Per Capita",warning=FALSE, cache=TRUE, echo = FALSE}
build_forest(mod, NULL)
```

And the funnel plot:

```{r, fig.width=4, fig.height=3.5, fig.cap="Funnel Plot -- Effect of Log of Lower Chamber Size on Log of Expenditure Per Capita", warning=FALSE, cache=TRUE, echo = FALSE}
funnel(mod)
```


Highlights:

1.  The results are highly heterogeneous: $Q =$ `r round(mod$QE, 2)`.
2.  The estimated SMD in the random effects model is $g =$ `r round(predict(mod)$pred,2)` ($SE =$ `r round(predict(mod)$se,3)`).
3.  The prediction interval ranges from `r round(predict(mod)$cr.lb,2)` to `r round(predict(mod)$cr.ub,2)`. Therefore, it `r ifelse(((predict(mod)$cr.lb)*(predict(mod)$cr.ub))>0,'does not emcompasses zero.','emcompasses zero.')`
4. The @egger1997bias test `r ifelse(ranktest(mod)$pval<0.05, 'confirmed', 'rejected')` the hypothesis of publication bias.

## Upper Chamber Size and Log of Expenditure Per Capita

No studies correlate log of per capita expenditure with the size of upper
chamber.

## Lower Chamber Size and Expenditure as Percentage of GDP

Here we test how lower house size impact public expenditures as a percentage of
GDP.

```{r, size="footnotesize", warning=F, cache=TRUE, echo = FALSE}
# Pooling effects analysis -- PCTGDP x N
aux <- fulldat %>%
  filter(indepvar2 == 'N',
         depvar2 == 'PCTGDP')

mod <- rma.mv(coef, VAR, data=aux,
          slab=paste(authoryear),
          test = 't',
          random = ~ 1 | id_level1/id_level2,
          tdist = TRUE,
          method = "REML")
mod
```

Here is the forest plot:

```{r, fig.width=8, fig.height=7, fig.cap="Effect of Lower Chamber Size on Expenditure as Percentage of GDP",warning=FALSE, cache=TRUE, echo = FALSE}
build_forest(mod, NULL)
```

And here is the funnel plot to assess the possibility of publication bias:

```{r, fig.width=4, fig.height=3.75, fig.cap="Funnel Plot -- Effect of Lower Chamber Size on Expenditure as Percentage of GDP", warning=FALSE, cache=TRUE, echo = FALSE}
funnel(mod)
```


Highlights:

1.  The results are highly heterogeneous: $Q =$ `r round(mod$QE, 2)`.
2.  The estimated SMD in the random effects model is $g =$ `r round(predict(mod)$pred,2)` ($SE =$ `r round(predict(mod)$se,3)`).
3.  The prediction interval ranges from `r round(predict(mod)$cr.lb,2)` to `r round(predict(mod)$cr.ub,2)`. Therefore, it `r ifelse(((predict(mod)$cr.lb)*(predict(mod)$cr.ub))>0,'does not emcompasses zero.','emcompasses zero.')`
4. The @egger1997bias test `r ifelse(ranktest(mod)$pval<0.05, 'confirmed', 'rejected')` the hypothesis of publication bias.

## Log of Lower Chamber Size and Expenditure as Percentage of GDP

This meta-regression assesses how public expenditure as a percentage of GDP
varies according to log of lower chamber size.

```{r, size="footnotesize", warning=F, cache=TRUE, echo = FALSE}
# Pooling effects analysis -- PCTGDP x logN
aux <- fulldat %>%
  filter(indepvar2 == 'logN',
         depvar2 == 'PCTGDP')

mod <- rma.mv(coef, VAR, data=aux,
          slab=paste(authoryear),
          test = 't',
          random = ~ 1 | id_level1/id_level2,
          tdist = TRUE,
          method = "REML")
mod
```

The forest plot:

```{r, fig.width=8, fig.height=3, fig.cap="Effect of Log Lower Chamber Size on Expenditure as Percentage of GDP",warning=FALSE, cache=TRUE, echo = FALSE}
build_forest(mod, NULL)
```

And the funnel plot:

```{r, fig.width=4, fig.height=4, fig.cap="Funnel Plot -- Effect of Log Lower Chamber Size on Expenditure as Percentage of GDP", warning=FALSE, cache=TRUE, echo = FALSE}
funnel(mod)
```


Highlights:

1.  The results are highly heterogeneous: $Q =$ `r round(mod$QE, 2)`.
2.  The estimated SMD in the random effects model is $g =$ `r round(predict(mod)$pred,2)` ($SE =$ `r round(predict(mod)$se,3)`).
3.  The prediction interval ranges from `r round(predict(mod)$cr.lb,2)` to `r round(predict(mod)$cr.ub,2)`. Therefore, it `r ifelse(((predict(mod)$cr.lb)*(predict(mod)$cr.ub))>0,'does not emcompasses zero.','emcompasses zero.')`
4. The @egger1997bias test `r ifelse(ranktest(mod)$pval<0.05, 'confirmed', 'rejected')` the hypothesis of publication bias.

## Upper Chamber Size and Expenditure as Percentage of GDP

This model measures the effect of upper chamber size on the expenditure as a
percentage of the GDP.

```{r, size="footnotesize", warning=F, cache=TRUE, echo = FALSE}
# Pooling effects analysis -- PCTGDP x K
aux <- fulldat %>%
  filter(indepvar2 == 'K',
         depvar2 == 'PCTGDP')

mod <- rma.mv(coef, VAR, data=aux,
          slab=paste(authoryear),
          test = 't',
          random = ~ 1 | id_level1/id_level2,
          tdist = TRUE,
          method = "REML")
mod
```

And the forest plot:

```{r, fig.width=8, fig.height=4, fig.cap="Effect of Upper Chamber Size on Expenditure as Percentage of GDP",warning=FALSE, cache=TRUE, echo = FALSE}
build_forest(mod, NULL)
```

And to assess the possibility of publication bias, we add funnel plot below:

```{r, fig.width=4, fig.height=4, fig.cap="Funnel Plot -- Effect of Upper Chamber Size on Expenditure as Percentage of GDP", warning=FALSE, cache=TRUE, echo = FALSE}
funnel(mod)
```


Highlights:

1.  The results are highly heterogeneous: $Q =$ `r round(mod$QE, 2)`.
2.  The estimated SMD in the random effects model is $g =$ `r round(predict(mod)$pred,2)` ($SE =$ `r round(predict(mod)$se,3)`).
3.  The prediction interval ranges from `r round(predict(mod)$cr.lb,2)` to `r round(predict(mod)$cr.ub,2)`. Therefore, it `r ifelse(((predict(mod)$cr.lb)*(predict(mod)$cr.ub))>0,'does not emcompasses zero.','emcompasses zero.')`
4. The @egger1997bias test `r ifelse(ranktest(mod)$pval<0.05, 'confirmed', 'rejected')` the hypothesis of publication bias.

## Lower Chamber Size and Expenditure per Capita (Instrumental Variables Only)

The model below evaluates only papers that use instrumental variables and
correlate lower chamber size with public expenditure per capita.

```{r, size="footnotesize", cache=TRUE, echo = FALSE}
# Pooling effects analysis -- ExpPC x N (IV only)
aux <- fulldat %>%
  filter(indepvar2 == 'N',
         depvar2 == 'ExpPC',
         method %in% c('IV'))

mod <- rma.mv(coef, VAR, data=aux,
          slab=paste(authoryear),
          test = 't',
          random = ~ 1 | id_level1/id_level2,
          tdist = TRUE,
          method = "REML")
mod
```

And the forest plot:

```{r, fig.width=8, fig.height=4, fig.cap="Effect of Lower Chamber Size on Expenditure Per Capita (IV Only)", warning=FALSE, cache=TRUE, echo = FALSE}
build_forest(mod, NULL)
```

And to assess the possibility of publication bias, we add funnel plot below:

```{r, fig.width=4, fig.height=4, fig.cap="Funnel Plot -- Effect of Lower Chamber Size on Expenditure Per Capita (IV Only)", warning=FALSE, cache=TRUE, echo = FALSE}
funnel(mod)
```

Highlights:

1.  The results are highly heterogeneous: $Q =$ `r round(mod$QE, 2)`.
2.  The estimated SMD in the random effects model is $g =$ `r round(predict(mod)$pred,2)` ($SE =$ `r round(predict(mod)$se,3)`).
3.  The prediction interval ranges from `r round(predict(mod)$cr.lb,2)` to `r round(predict(mod)$cr.ub,2)`. Therefore, it `r ifelse(((predict(mod)$cr.lb)*(predict(mod)$cr.ub))>0,'does not emcompasses zero.','emcompasses zero.')`
4. The @egger1997bias test `r ifelse(ranktest(mod)$pval<0.05, 'confirmed', 'rejected')` the hypothesis of publication bias.

## Lower Chamber Size and Log of Expenditure Per Capita (Regression Discontinuity Designs Only)

Lastly, we run a meta-analysis with papers that use regression discontinuity
designs and assess the effect of lower house size on log of expenditure per
capita.

```{r, size="footnotesize", cache=TRUE, echo = FALSE}
# Pooling effects analysis -- ExpPC x N
aux <- fulldat %>%
  filter(indepvar2 == 'N',
         depvar2 == 'logExpPC',
         method == 'RDD')

mod <- rma.mv(coef, VAR, data=aux,
          slab=paste(authoryear),
          test = 't',
          random = ~ 1 | id_level1/id_level2,
          tdist = TRUE,
          method = "REML")
mod
```

And the forest plot:

```{r, fig.width=8, fig.height=4, fig.cap="Effect of Lower Chamber Size on Log Expenditure Per Capita (RDD Only)",warning=FALSE, cache=TRUE, echo = FALSE}
build_forest(mod, NULL)
```

And to assess the possibility of publication bias, we add funnel plot below:

```{r, fig.width=4, fig.height=4, fig.cap="Funnel Plot -- Effect of Lower Chamber Size on Log Expenditure Per Capita (RDD Only)", warning=FALSE, cache=TRUE, echo = FALSE}
funnel(mod)
```


Highlights:

1.  The results are highly heterogeneous: $Q =$ `r round(mod$QE, 2)`.
2.  The estimated SMD in the random effects model is $g =$ `r round(predict(mod)$pred,2)` ($SE =$ `r round(predict(mod)$se,3)`).
3.  The prediction interval ranges from `r round(predict(mod)$cr.lb,2)` to `r round(predict(mod)$cr.ub,2)`. Therefore, it `r ifelse(((predict(mod)$cr.lb)*(predict(mod)$cr.ub))>0,'does not emcompasses zero.','emcompasses zero.')`
4. The @egger1997bias test `r ifelse(ranktest(mod)$pval<0.05, 'confirmed', 'rejected')` the hypothesis of publication bias.

# Meta-Regressions

In the meta-regressions, we study the effects of a group of moderators on the
reported government spending data. We select the following moderators:

1. The independent variable (variable `indepvar2`):
  - `K`: Upper Chamber Size
  - `N`: Lower Chamber Size
  - `logN`: Log of Lower Chamber Size

2. Year that the paper was published (for working papers, the year it was posted online; variable `year`)

3. A dummy indicating whether the paper was published or not (variable `published`).

4. A dummy for non-majoritarian electoral systems (variable `elecsys2`).

5. A variable describing the papers' institutional design, with:
 - `Bicameral`: a legislative system comprised of two chambers (e.g. US Federal Senate and House).
 - `Unicameral`: a system with only a lower chamber (e.g. US Municipalities).
 - `Mixed`: more than one system in the same sample (cross-country regressions).

6. The estimation method used in the papers (variable `method`):
  - `OLS`: Ordinary-Least Squares in Cross-Sectional data.
  - `PANEL`: Time-Series Cross-Section models, with estimated fixed effects.
  - `IV`: Instrumental Variables models.
  - `RDD`: Regression Discontinuity Designs.

The results follows below for the three dependent variables: expenditure per
capita, log of expenditure per capita, and expenditure as a percentage of GDP.

## Meta-Regressions for Expenditure Per Capita

Here we study the impact of our moderators on expenditure per capita.

```{r, size="footnotesize", error=F, warning=F, cache=T, echo = FALSE}
mod <- rma.mv(yi = coef,
              V = VAR,
              data = dat,
              method = "REML",
              random = ~ 1 | id_level1/id_level2,
              mods = ~indepvar2+year+published+elecsys2+method+instdesign,
              test = "knha",
              sparse = TRUE,
              tdist = TRUE,
              subset = dat$depvar2=='ExpPC',
              slab = dat$authoryear)

summary(mod)
```

For the meta-regressions of expenditure per capita, we find no significant
moderator. We also run the meta-regressions adding all coefficients included in
the papers. The results follow below:

```{r, size="footnotesize", warning=F, cache=T, echo = FALSE}
mod <- rma.mv(yi = coef,
              V = VAR,
              data = fulldat,
              method = "REML",
              random = ~ 1 | id_level1/id_level2,
              mods = ~indepvar2+year+published+elecsys2+method+instdesign,
              test = "knha",
              tdist = TRUE,
              sparse = TRUE,
              subset = fulldat$depvar2=='ExpPC',
              slab = fulldat$authoryear)

summary(mod)
```

With all coefficients, the results of the effect sizes on the expenditure per capita regressions are the following:

1.  Compared with upper chamber size, models with lower chamber size tend to detect significantly smaller effects.

2.  Year has now a positive effect on coefficient sizes.

3.  All other moderators were statistically insignificant.

## Meta-Regressions for Log of Expenditure Per Capita

The next meta-regression models use log of expenditure per capita as the
dependent variable.

```{r, size="footnotesize", warning=F, cache=T, echo = FALSE}
mod <- rma.mv(yi = coef,
              V = VAR,
              data = dat,
              method = "REML",
              random = ~ 1 | id_level1/id_level2,
              mods = ~indepvar2+year+published+elecsys2+method+instdesign,
              test = "knha",
              tdist = TRUE,
              sparse = TRUE,
              subset = dat$depvar2=='logExpPC',
              slab = dat$authoryear)

summary(mod)
```

The only significant result suggests that published papers tend to report a
smaller coefficient than unpublished papers.

Below we also run the meta-regressions with all coefficients included in the
papers.

```{r, size="footnotesize", warning=F, cache=T, echo = FALSE}
mod <- rma.mv(yi = coef,
              V = VAR,
              data = fulldat,
              method = "REML",
              random = ~ 1 | id_level1/id_level2,
              mods = ~indepvar2+year+published+elecsys2+method+instdesign,
              test = "knha",
              tdist = TRUE,
              sparse = TRUE,
              subset = fulldat$depvar2=='logExpPC',
              slab = fulldat$authoryear)

summary(mod)
```

When we include all coefficients in the meta-regression, we see that papers
that use panel data or regression discontinuity designs have smaller effects
than articles that employ simple OLS regressions. All other results are
insignificant.

## Meta-Regressions for Expenditure as a Percentage of GDP

Here we conduct the same analysis, but using public spending as a percentage of
GDP as our dependent variable.

```{r, size="footnotesize", warning=F, cache=T, echo = FALSE}
mod <- rma.mv(yi = coef,
              V = VAR,
              data = dat,
              method = "REML",
              random = ~ 1 | id_level1/id_level2,
              mods = ~indepvar2+year+published+elecsys2+method+instdesign,
              test = "knha",
              tdist = TRUE,
              sparse = TRUE,
              subset = dat$depvar2=='PCTGDP',
              slab = dat$authoryear)

summary(mod)
```

We find that:

1. Recent papers report smaller effect sizes than older studies.

2. Papers whose models include unicameral legislatures report slightly higher coefficients.

Below, we show the results for our meta-regression when we use our full study sample. The results follow below:

```{r, size="footnotesize", warning=F, cache=T, echo = FALSE}
mod <- rma.mv(yi = coef,
              V = VAR,
              data = fulldat,
              method = "REML",
              random = ~ 1 | id_level1/id_level2,
              mods = ~indepvar2+year+published+elecsys2+method+instdesign,
              test = "knha",
              tdist = TRUE,
              sparse = TRUE,
            subset = fulldat$depvar2=='PCTGDP',
            slab = fulldat$authoryear)
summary(mod)
```

The model indicates that:

1. Papers from more recent years find smaller effect sizes than older studies.

2. Studies using non-majoritarian systems find more positive effects between variables.

3. Papers whose models included unicameral legislatures report significantly higher coefficients.

## Meta-Regressions (All Coefficients)

Here we aggregate all the papers in our sample and run a multivariate meta-regression, controlling for:

1. The type of the dependent variable in the study (expenditure per capita, log
   expenditure per capita, and expenditure as percentage of GDP).

2. The type of the independent variable in the study (lower chamber size, upper
   chamber size, or log of lower chamber size).

3. The electoral system (Majoritarian versus Non-Majoritarian).

4. The year when the study was published.

5. Whether the study is a working paper or published work.

6. The institutional design: whether the legislature in the analysis is unicameral, bicameral, or mixed.

7. The estimation method used in the paper (OLS, PANEL, IV, or RDD).

The results follow below, and show null effects for all variables except for unicameral institutional designs.

```{r, size="footnotesize", warning=F, cache=T, echo = FALSE}
mod <- rma.mv(yi = coef,
              V = VAR,
              data = dat,
              method = "REML",
              random = ~ 1 | id_level1/id_level2,
              mods = ~depvar2+indepvar2+year+published+elecsys2+method+instdesign,
              test = "knha",
              tdist = TRUE,
              sparse = TRUE,
              slab = dat$authoryear)

summary(mod)
```

In the restricted sample, which included only the main coefficients in the selected papers, unicameralism has a positive, significant effect. Also, papers that employ regression discontinuity design tend to have a negative effect, significant at the 10% level. We also run the same meta-regression with every coefficient reported in all papers. The results are:

```{r, size="footnotesize", warning=F, cache=T, echo = FALSE}
mod <- rma.mv(yi = coef,
              V = VAR,
              data = fulldat,
              method = "REML",
              random = ~ 1 | id_level1/id_level2,
              mods = ~depvar2+indepvar2+year+published+elecsys2+method+instdesign,
              test = "knha",
              tdist = TRUE,
              sparse = TRUE,
              slab = fulldat$authoryear)
summary(mod)
```
In the full model, we see that:

1. Compared with papers that employ `OLS`, those that use `PANEL`, `IV` and
   `RDD` report significantly smaller coefficients.

2. Evidence from unicameral systems presents significantly larger effect sizes.

3. All other moderators are insignificant.

## Comparing Coefficient Sizes and Dependent Variables

Finally, we run a simple linear regression to check whether there are
significant differences between effect sizes for our dependent variables.
Although the meta-regressions show that the difference is statistically
negligible, we can still witness larger or smaller coefficient sizes in
different models. The results below show that the differences are indeed mostly
negligible.

```{r, size="footnotesize", echo = FALSE, cache=TRUE}
aux <- dat
aux$depvar2 <- factor(aux$depvar2, levels = c('PCTGDP', 'ExpPC', 'logExpPC'))
summary(lm(coef~depvar2, data = aux))
```


